---
title: "Replication Output for: Can the Government Deter Discrimination? Evidence from a Randomized Intervention in New York City"
author: "Albert H. Fang, Andrew M. Guess, and Macartan Humphreys"
date: "December 2, 2017"
output:
  html_document:
    toc: true
    number_sections: yes
    theme: cosmo
---

**IMPORTANT NOTES (PLEASE READ FIRST):**

* This Rmd file produces the tables, figures, and analyses reported in the manuscript and in the online appendix. This output file shows the core code and analyses used, as well as the main output from these analyses.
* At various points in this Rmd file, we have set `echo=F` and `eval=F` in selected R code chunks to suppress displaying tex output in the Rmd output. To have these code chunks produce .tex table output files, set `eval=T`. 
* You must specify your local root directory (in the first code chunk). 
* The code in `code_0_read_data.R` currently loads data from the local directory. If you want to load data from Dataverse, in the second code chunk you must (1) set `local_data=FALSE` and (2) set your Dataverse API token in the line `Sys.setenv("DATAVERSE_KEY" = "ENTER-YOUR-API-TOKEN-HERE")`.

```{r, include=F}
  rm(list=ls(all=TRUE))
  require("knitr")
  ### SET LOCAL ROOT DIRECTORY - Replace root.dir with your local root directory path.
  opts_knit$set(root.dir = "~/Dropbox/NYC Housing Study - Replication Files/Replicate/")
  opts_chunk$set(warning = F)
  options(knitr.kable.NA = '')
```

```{r, include=F}
  # Packages
  pkgs <- c("plyr","dplyr", "reshape2", "tidyr", "broom", "xtable",
            "weights","arm", "purrr", "glmnet", "animation", "maptools",
            "RColorBrewer", "AER", "Hmisc", "gtools","bootstrap","glmnet",
            "lme4", "nnet", "ggplot2", "stargazer", "stringr", "readr", "dataverse")

  # Install packages if not yet installed
  lapply(pkgs, function(x){
    if(x %in% rownames(installed.packages()) == FALSE) {install.packages(x, dependencies=TRUE, repos="https://cloud.r-project.org/")}
  })
  
  # Load packages
  library(plyr); library(dplyr); library(reshape2); library(tidyr); library(broom); library(xtable)
  library(weights); library(arm); library(purrr); library(glmnet); library(animation); library(maptools)
  library(RColorBrewer); library(AER); library(Hmisc); library(gtools); library(bootstrap); library(lme4)
  library(nnet); library(ggplot2); library(stargazer); library(stringr); library(readr); library(dataverse)

  # Set seed
  set.seed(20150229)

  # Load data locally or from dataverse
  # -- if TRUE: Data must be saved in the local root directory (specified in above code chunk)
  # -- if FALSE: Data will load from Dataverse; you must have a Dataverse API Token
  local_data <- TRUE
  if(!local_data){
    Sys.setenv("DATAVERSE_KEY" = "ENTER-YOUR-API-TOKEN-HERE")  ##### Put your Dataverse API Token here.
    Sys.setenv("DATAVERSE_SERVER" = "dataverse.harvard.edu")
  }
  source("code_0_read_data.R")

  # Load helper functions and prepare data
  source("code_1a_prep_data.R") 
  source("code_1b_helper_functions.R") 
```

# Tables and Figures in Main Text

## Figure 1: Main Results on Discrimination Levels and Treatment Effects

### Code for Analyses

```{r figure1analysis, echo=T, eval=T, warning=F}
# ---------------------------------------------------------------------- #
# DEFINE COLNAMES FOR OUTPUT TABLES

col.labels <- c("Outcome",c("Estimate","SE","t","P"))

# ---------------------------------------------------------------------- #
# MODELS WITH ONLY BLOCK FIXED EFFECTS

itt.wb.mc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pm <- matrix(NA, nrow=length(outvars.wb), ncol=5)

itt.wh.mc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pm <- matrix(NA, nrow=length(outvars.wh), ncol=5)

itt.bh.mc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pm <- matrix(NA, nrow=length(outvars.bh), ncol=5)

fit.wb.mc <- fit.wb.pc <- fit.wb.pm <- fit.wh.mc <- fit.wh.pc <- fit.wh.pm <- fit.bh.mc <- fit.bh.pc <- fit.bh.pm <- list() # store results from ITT est w/ block FE (no other covs)

for(i in 1:length(outvars.wb)){
  model.mc <- paste(outvars.wb[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")

  fit.wb.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wb.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wb.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)

  itt.wb.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wb.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wb.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]

  itt.wb.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wb.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
}

for(i in 1:length(outvars.wh)){
  model.mc <- paste(outvars.wh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")

  fit.wh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)

  itt.wh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]

  itt.wh.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wh.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
}

for(i in 1:length(outvars.bh)){
  model.mc <- paste(outvars.bh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")

  fit.bh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.bh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.bh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)

  itt.bh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.bh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.bh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
}

# Add outcome measure labels
itt.wb.mc[,1] <- outvars.wb.labs
itt.wb.pc[,1] <- outvars.wb.labs
itt.wb.pm[,1] <- outvars.wb.labs

itt.wh.mc[,1] <- outvars.wh.labs
itt.wh.pc[,1] <- outvars.wh.labs
itt.wh.pm[,1] <- outvars.wh.labs

itt.bh.mc[,1] <- outvars.bh.labs
itt.bh.pc[,1] <- outvars.bh.labs
itt.bh.pm[,1] <- outvars.bh.labs

# Add column names
colnames(itt.wb.mc) <- colnames(itt.wb.pc) <- colnames(itt.wb.pm) <- col.labels
colnames(itt.wh.mc) <- colnames(itt.wh.pc) <- colnames(itt.wh.pm) <- col.labels
colnames(itt.bh.mc) <- colnames(itt.bh.pc) <- colnames(itt.bh.pm) <- col.labels

# Stack, create output table
out2 <- rbind(itt.wb.mc, itt.wh.mc, itt.bh.mc,
              itt.wb.pc, itt.wh.pc, itt.bh.pc,
              itt.wb.pm, itt.wh.pm, itt.bh.pm)

out2 <- out2[-seq(1,33,4),]

#----- CIs -----#

df.wb.mc <- unlist(lapply(fit.wb.mc, function(x) summary(x)$df[2]))[2:4]
df.wb.pc <- unlist(lapply(fit.wb.pc, function(x) summary(x)$df[2]))[2:4]
df.wb.pm <- unlist(lapply(fit.wb.pm, function(x) summary(x)$df[2]))[2:4]

df.wh.mc <- unlist(lapply(fit.wh.mc, function(x) summary(x)$df[2]))[2:4]
df.wh.pc <- unlist(lapply(fit.wh.pc, function(x) summary(x)$df[2]))[2:4]
df.wh.pm <- unlist(lapply(fit.wh.pm, function(x) summary(x)$df[2]))[2:4]

df.bh.mc <- unlist(lapply(fit.bh.mc, function(x) summary(x)$df[2]))[2:4]
df.bh.pc <- unlist(lapply(fit.bh.pc, function(x) summary(x)$df[2]))[2:4]
df.bh.pm <- unlist(lapply(fit.bh.pm, function(x) summary(x)$df[2]))[2:4]

df.2g.10 <- c(df.wb.mc, df.wh.mc, df.bh.mc)
df.2g.20 <- c(df.wb.pc, df.wh.pc, df.bh.pc)
df.2g.21 <- c(df.wb.pm, df.wh.pm, df.bh.pm)

cv.2g.10 <- qt(p=0.025, df=df.2g.10, lower.tail=TRUE)
cv.2g.20 <- qt(p=0.025, df=df.2g.20, lower.tail=TRUE)
cv.2g.21 <- qt(p=0.025, df=df.2g.21, lower.tail=TRUE)

cv.2g <- c(cv.2g.10, cv.2g.20, cv.2g.21)
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))

# without rounding
#cbind(apply(itt.2g.ci, 1, function(x) paste("[", x[1] ,", ", x[2], "]", sep="")))

# with rounding
#cbind(apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep="")))

# Round and format results
for(i in 2:ncol(out2)) out2[,i] <- round(as.numeric(out2[,i]),3)
for(i in 5) out2[,i] <- paste("(",out2[,i],")",sep="")

# Assemble results
itt.2g.tab <- cbind(out2, cbind(apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))))
colnames(itt.2g.tab) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")
# Save results
write.csv(itt.2g.tab, "out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", row.names=FALSE)

# Prep results for plotting
itt.est <- read.csv("out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", header=TRUE, stringsAsFactors=FALSE)

itt.est$lower.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("[", "", x[1], fixed=TRUE)))
itt.est$upper.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("]", "", x[2], fixed=TRUE)))

row_labels <- c("W-B", "W-H", "B-H")
col_labels <- c("M-C", "P-C", "P-M")

cb.itt <- itt.est[grepl("callback", itt.est[,1], fixed=TRUE),]
off.itt <- itt.est[grepl("offer", itt.est[,1], fixed=TRUE),]
meindex.itt <- itt.est[grepl("Index", itt.est[,1], fixed=TRUE),]

cb.reg <- list(coef=matrix(cb.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
               se=matrix(cb.itt$SE, nrow=3, ncol=3, byrow=FALSE),
               lower=matrix(cb.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
               upper=matrix(cb.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

off.reg <- list(coef=matrix(off.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
               se=matrix(off.itt$SE, nrow=3, ncol=3, byrow=FALSE),
               lower=matrix(off.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
               upper=matrix(off.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

meindex.reg <- list(coef=matrix(meindex.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
               se=matrix(meindex.itt$SE, nrow=3, ncol=3, byrow=FALSE),
               lower=matrix(meindex.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
               upper=matrix(meindex.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

#===============#
# Create basic.comparisons.table function used to generate Figure 1
#===============#

# Reports all differences and ses and ps for non parametric comparisons
# All based on t-tests

basic.comparisons.table = function(
  yroot     = "cb" ,    # Indicate root of y variable name
  row_reorder = 1:6
){
  Y.B <- dat[paste(yroot, "_A", sep="")][,1]  # outcome for blacks
  Y.H <- dat[paste(yroot, "_B", sep="")][,1]  # outcome for hispanics
  Y.W <- dat[paste(yroot, "_C", sep="")][,1]  # outcome for whites   
  W   <- dat$ipw  # Weight 
  T   <- dat$TA   # Treatment
  rnames <- c("B","H","W","B-H","W-B","W-H")  
  cnames <-   c("Control", "Monitoring", "Punitive", "M-C", "P-C", "P-M")
  
  O <- rbind(
    sapply(0:2, function(c) weighted.mean(Y.B[T==c], W[T==c])),
    sapply(0:2, function(c) weighted.mean(Y.H[T==c], W[T==c])),
    sapply(0:2, function(c) weighted.mean(Y.W[T==c], W[T==c])))
  
  O <- cbind(O, O[,2] -O[,1],  O[,3] -O[,1],  O[,3] -O[,2])
  O <- rbind(O, O[1,] -O[2,],  O[3,] -O[1,],  O[3,] -O[2,])
  colnames(O) <- cnames
  rownames(O) <- rnames
  
  
  # Standard Errors and ps and dfs and CIs: Theses are worked out in four blocks
  # Block [1,1]
  main.se <- rbind(
    sapply(0:2, function(c) wtd.var(Y.B[T==c], W[T==c])/((sum(T==c)^.5))),
    sapply(0:2, function(c) wtd.var(Y.H[T==c], W[T==c])/((sum(T==c)^.5))),
    sapply(0:2, function(c) wtd.var(Y.W[T==c], W[T==c])/((sum(T==c)^.5)))
  )
  
  main.df <- rbind(
    sapply(0:2, function(c) length(Y.B[T==c])-1),
    sapply(0:2, function(c) length(Y.H[T==c])-1),
    sapply(0:2, function(c) length(Y.W[T==c])-1))
  
  main.cv <- apply(main.df, 1, function(j) abs(qt(p=.025, df=j, lower.tail=TRUE)))
  
  main.levels <- O[1:3,1:3]
  main.lower <- main.upper <- matrix(NA, nrow=nrow(main.se), ncol=ncol(main.se))
  for(u in 1:ncol(main.se)){
    for(v in 1:nrow(main.se)){
      main.lower[v,u] <- main.levels[v,u] - main.cv[v,u]*main.se[v,u]
      main.upper[v,u] <- main.levels[v,u] + main.cv[v,u]*main.se[v,u]
    }
  }
  
  # Block [2,1] Disrimination
  discrimination.p <- rbind(
    sapply(0:2, function(c) wtd.t.test((Y.B-Y.H)[T==c], weight=W[T==c])$coefficient[3]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.B)[T==c], weight=W[T==c])$coefficient[3]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.H)[T==c], weight=W[T==c])$coefficient[3]))
  
  discrimination.se <- rbind(
    sapply(0:2, function(c) wtd.t.test((Y.B-Y.H)[T==c], weight=W[T==c])$additional[4]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.B)[T==c], weight=W[T==c])$additional[4]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.H)[T==c], weight=W[T==c])$additional[4]))
  
  discrimination.df <- rbind(
    sapply(0:2, function(c) wtd.t.test((Y.B-Y.H)[T==c], weight=W[T==c])$coefficient[2]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.B)[T==c], weight=W[T==c])$coefficient[2]),
    sapply(0:2, function(c) wtd.t.test((Y.W-Y.H)[T==c], weight=W[T==c])$coefficient[2]))
  
  discrimination.cv <- apply(discrimination.df, 1, function(j) abs(qt(p=.025, df=j, lower.tail=TRUE)))
  
  discrimination.levels <- O[4:6,1:3]
  discrimination.lower <- discrimination.upper <- matrix(NA, nrow=nrow(discrimination.se), ncol=ncol(discrimination.se))
  for(u in 1:ncol(discrimination.se)){
    for(v in 1:nrow(discrimination.se)){
      discrimination.lower[v,u] <- discrimination.levels[v,u] - discrimination.cv[v,u]*discrimination.se[v,u]
      discrimination.upper[v,u] <- discrimination.levels[v,u] + discrimination.cv[v,u]*discrimination.se[v,u]
    }
  }
  
  # Block [1,2] Main Treatment Effects Discrimination
  get.p.se = function(Y, a,b, gets = 1) {
    # set gets = 1 for coefficient, 2 for se, 3 for t and 4 for p
    MM <- lm(Y[T==a | T==b]  ~ (T==a)[T==a | T==b], weight = W[T==a | T==b])
    coef(summary(MM))[2,gets] 
  }
  
  matrix.e <- function(gets) {rbind(
    c( get.p.se(Y.B, 0,1, gets=gets), get.p.se(Y.B, 0,2, gets=gets), get.p.se(Y.B, 1,2, gets=gets)),
    c( get.p.se(Y.H, 0,1, gets=gets), get.p.se(Y.H, 0,2, gets=gets), get.p.se(Y.H, 1,2, gets=gets)),
    c( get.p.se(Y.W, 0,1, gets=gets), get.p.se(Y.W, 0,2, gets=gets), get.p.se(Y.W, 1,2, gets=gets))
  )}
  
  effects.p <- matrix.e(4)
  effects.se <- matrix.e(2)
  
  get.df = function(Y, a, b) {
    MM <- lm(Y[T==a | T==b]  ~ (T==a)[T==a | T==b], weight = W[T==a | T==b])
    summary(MM)$df[2]
  }
  
  matrix.df <- function(x) { rbind(
    c( get.df(Y.B, 0,1), get.df(Y.B, 0,2), get.df(Y.B, 1,2)),
    c( get.df(Y.H, 0,1), get.df(Y.H, 0,2), get.df(Y.H, 1,2)),
    c( get.df(Y.W, 0,1), get.df(Y.W, 0,2), get.df(Y.W, 1,2))
  )}
  
  
  diff.df <- matrix.df()
  diff.cv <- apply(diff.df, 1, function(j) abs(qt(p=.025, df=j, lower.tail=TRUE)))
  
  diff.levels <- O[1:3,4:6]
  diff.lower <- diff.upper <- matrix(NA, nrow=nrow(effects.se), ncol=ncol(effects.se))
  for(u in 1:ncol(effects.se)){
    for(v in 1:nrow(effects.se)){
      diff.lower[v,u] <- diff.levels[v,u] - diff.cv[v,u]*effects.se[v,u]
      diff.upper[v,u] <- diff.levels[v,u] + diff.cv[v,u]*effects.se[v,u]
    }
  }
  
  
  # Block [2,2]: Discrimination Effects
  d.get.p.se = function(Yi, Yj, a,b, gets = 1) {
    # set gets = 1 for coefficient, 2 for se, 3 for t and 4 for p
    MM <- lm(Yj[T==a | T==b] - Yi[T==a | T==b]  ~ (T==a)[T==a | T==b], weight = W[T==a | T==b])
    coef(summary(MM))[2,gets] 
  }
  
  d.matrix.e <- function(gets) {rbind(
    c( d.get.p.se(Y.H, Y.B, 0,1, gets=gets), d.get.p.se(Y.H, Y.B, 0,2, gets=gets), d.get.p.se(Y.H, Y.B, 1,2, gets=gets)),
    c( d.get.p.se(Y.B, Y.W, 0,1, gets=gets), d.get.p.se(Y.B, Y.W, 0,2, gets=gets), d.get.p.se(Y.B, Y.W, 1,2, gets=gets)),
    c( d.get.p.se(Y.H, Y.W, 0,1, gets=gets), d.get.p.se(Y.H, Y.W, 0,2, gets=gets), d.get.p.se(Y.H, Y.W, 1,2, gets=gets))
  )}
  
  d.effects.p <- d.matrix.e(4)
  d.effects.se <- d.matrix.e(2)
  
  get.itt.df = function(Yi, Yj, a, b) {
    MM <- lm(Yj[T==a | T==b] - Yi[T==a | T==b]  ~ (T==a)[T==a | T==b], weight = W[T==a | T==b])
    summary(MM)$df[2]
  }
  
  matrix.itt.df <- function(x) { rbind(
    c( d.get.p.se(Y.H, Y.B, 0,1), d.get.p.se(Y.H, Y.B, 0,2), d.get.p.se(Y.H, Y.B, 1,2)),
    c( d.get.p.se(Y.B, Y.W, 0,1), d.get.p.se(Y.B, Y.W, 0,2), d.get.p.se(Y.B, Y.W, 1,2)),
    c( d.get.p.se(Y.H, Y.W, 0,1), d.get.p.se(Y.H, Y.W, 0,2), d.get.p.se(Y.H, Y.W, 1,2))
  )}
  
  
  itt.df <- matrix.df()
  itt.cv <- apply(itt.df, 1, function(j) abs(qt(p=.025, df=j, lower.tail=TRUE)))
  
  itt.levels <- O[4:6,4:6]
  itt.lower <- itt.upper <- matrix(NA, nrow=nrow(d.effects.se), ncol=ncol(d.effects.se))
  for(u in 1:ncol(d.effects.se)){
    for(v in 1:nrow(d.effects.se)){
      itt.lower[v,u] <- itt.levels[v,u] - itt.cv[v,u]*d.effects.se[v,u]
      itt.upper[v,u] <- itt.levels[v,u] + itt.cv[v,u]*d.effects.se[v,u]
    }
  }
  
  # Stitch together SE and p
  SE <- rbind(cbind(main.se, effects.se),
              cbind(discrimination.se, d.effects.se))
  
  P <- rbind(cbind(matrix(NA, 3,3), effects.p),
             cbind(discrimination.p, d.effects.p))
  colnames(SE) <- cnames; rownames(SE) <- rnames
  colnames(P) <- cnames;  rownames(P) <- rnames
  
  
  # Stitch together CIs
  
  LCI <- rbind(cbind(main.lower,diff.lower),
               cbind(discrimination.lower,itt.lower))
  UCI <- rbind(cbind(main.upper,diff.upper),
               cbind(discrimination.upper,itt.upper))
  colnames(LCI) <- colnames(UCI) <- cnames
  rownames(LCI) <- rownames(UCI) <- rnames
  
  # Reorder rows as specified
  
  O <- O[row_reorder,]
  SE <- SE[row_reorder,]
  P <- P[row_reorder,]
  LCI <- LCI[row_reorder,]
  UCI <- UCI[row_reorder,]
  
  # Return list object
  
  list(coef=O, se = SE, p =P, lower=LCI, upper=UCI)
}


# Run basic.comparisons.table to generate data to populate graphs

cb <- basic.comparisons.table(yroot="cb", row_reorder=c(3,1,2,5,6,4))
off <- basic.comparisons.table(yroot="off", row_reorder=c(3,1,2,5,6,4))
meindex <- basic.comparisons.table(yroot="meindex.impute", row_reorder=c(3,1,2,5,6,4))

```

### Code for Figure Construction

```{r figure1plot, eval=T, echo=T, warning=F}

# HELPER FUNCTIONS

#================#
# Plot function used in Figure 1
#================#

# Take matrices CB, OFF, MEINDEX
# Take matrices CB.REG, OFF.REG, MEINDEX.REG

# plot with a single set of three or two threes
plot.helper <- function(coefs1=1:3, sds1=3:5, lower1, upper1, coefs2=NA, sds2=NA, lower2, upper2, double=FALSE, xlim=NULL ){
  coefplot(coefs1, sds1, CI=2, lower.conf.bounds=lower1, upper.conf.bounds=upper1, pch.pts=21, varnames="", main = "", h.axis = FALSE, xlim = xlim, cex.pts=3, lwd=3)
  if(double) coefplot(coefs2, sds2, CI=2, lower.conf.bounds=lower2, upper.conf.bounds=upper2, add=TRUE, pch.pts=19, cex.pts=3, lwd=3)
  axis(side=1, line=2, cex.axis=1.5, padj=1)
}


itt.plot <- function(y,y2,xlim){
  for(i in 1:3) plot.helper(coefs1=rev(y$coef[1:3,i]), sds1=rev(y$se[1:3,i]), xlim=xlim[[1]], lower1=rev(y$lower[1:3,i]), upper1=rev(y$upper[1:3,i]))
  for(i in 4:6) plot.helper(coefs1=rev(y$coef[1:3,i]), sds1=rev(y$se[1:3,i]), xlim=xlim[[2]], lower1=rev(y$lower[1:3,i]), upper1=rev(y$upper[1:3,i]))
  for(i in 1:3) {
    plot.helper(coefs1=rev(y$coef[4:6,i]), sds1=rev(y$se[4:6,i]), xlim=xlim[[3]], lower1=rev(y$lower[4:6,i]), upper1=rev(y$upper[4:6,i]))
    if ( i == 1 ) {  # highlight control group
      abline(h=3, col=rgb(0,0,0,.1), lwd=40)
      abline(h=2, col=rgb(0,0,0,.1), lwd=40)
    }
  }
  for(i in 4:6) {
    plot.helper(coefs1=rev(y$coef[4:6,i]), sds1=rev(y$se[4:6,i]), 
                lower1=rev(y$lower[4:6,i]), upper1=rev(y$upper[4:6,i]),
                coefs2=rev(y2$coef[1:3,(i-3)]), sds2=rev(y2$se[1:3,(i-3)]), 
                lower2=rev(y2$lower[1:3,(i-3)]), upper2=rev(y2$upper[1:3,(i-3)]),
                double=TRUE, xlim=xlim[[4]])
    if ( i == 5 ){  # highlight P-C
      abline(h=3, col=rgb(0,0,0,.1), lwd=40)
      abline(h=2, col=rgb(0,0,0,.1), lwd=40)
    }
  }
}


# FIGURE SPECS
col_width <- 7
row_height <- 4
width_spec <- c(4, 4, rep(col_width, 3), 2, rep(col_width, 3))
height_spec <- c(2, 2, row_height, 2, row_height)
colhead.cex <- 2.7 # font size for column headings
colhead2.cex <- 2.7 # font size for treatment headings
rowsub.cex <- 3  # font size for variable labels
col_headings <- c("Control", "Monitoring", "Punitive", "Monitoring\nvs. Control", "Punitive\nvs. Control", "Punitive vs.\nMonitoring")
pdf_width <- 24
pdf_height <- 11
layout_vec <- c(0,	0, 23,	23,	23,	0,	24,	24,	24,
                0, 0, 17, 18, 19, 14, 20, 21, 22,
                25, 15, 1, 2, 3, 14, 4, 5, 6,
                0, 13, 13, 13, 13, 13, 13, 13, 13,
                26, 16, 7, 8, 9, 14, 10, 11, 12)
png_width <- 2400
png_height <- 1100

## MAKE PDF FIGURES FOR PAPER AND PNG FIGURES FOR RMD FILE

## CALLBACKS
pdf("out_figure1a_6x6_cb.pdf", width=pdf_width, height=pdf_height)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(0, .25), c(-.15, .15), c(-.15, .15), c(-.2, .2))
itt.plot(cb, cb.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

# Make PNG version for Rmd output
png("out_figure1a_6x6_cb.png", width=png_width, height=png_height)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(0, .25), c(-.15, .15), c(-.15, .15), c(-.2, .2))
itt.plot(cb, cb.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

## OFFERS
pdf("out_figure1b_6x6_off.pdf", width=pdf_width, height=pdf_height)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(0, .15), c(-.1, .1), c(-.1, .1), c(-.2, .2))
itt.plot(off, off.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

# Make PNG version for Rmd output
png("out_figure1b_6x6_off.png", width=png_width, height=png_height)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(0, .15), c(-.1, .1), c(-.1, .1), c(-.2, .2))
itt.plot(off, off.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

## INDEX
pdf("out_figurea3_6x6_index.pdf", width=pdf_width, height=pdf_height)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(-.3, .3), c(-.35, .35), c(-.35, .35), c(-.35, .35))
itt.plot(meindex, meindex.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

# Make PNG version for Rmd output
png("out_figurea3_6x6_index.png", width=2400, height=1100)
par(oma=c(5,0,0,0), mar=c(5,0,0,0))
layout(matrix(layout_vec, nrow=5, ncol=9, byrow=TRUE), widths=width_spec, heights=height_spec)
# plots
xlims <- list(c(-.3, .3), c(-.35, .35), c(-.35, .35), c(-.35, .35))
itt.plot(meindex, meindex.reg, xlims)
# dividers
frame(); abline(h=.5, lwd=2)
frame(); abline(v=.5, lwd=2)
# row labels
frame(); text(.6, .85, "White", cex=rowsub.cex); text(.6, .5, "Black", cex=rowsub.cex); text(.6, .15, "Hisp.", cex=rowsub.cex)
frame(); text(.6, .85, "W-B", cex=rowsub.cex); text(.6, .5, "W-H", cex=rowsub.cex); text(.6, .15, "B-H", cex=rowsub.cex)
# col labels
for(i in 1:6) {
  frame(); text(.5, .5, col_headings[i], cex=colhead.cex)
}
# meta col labels
frame(); text(.5, .5, "Treatment Group Means", cex=colhead.cex)
frame(); text(.5, .5, "Differences between Treatment Groups", cex=colhead.cex)
# meta row labels
frame(); text(.5, .5, "Tester Race\nGroup Means", cex=colhead.cex, srt=90)
frame(); text(.5, .5, "Intergroup\nDifferences", cex=colhead.cex, srt=90)
dev.off()

```

![**Figure 1, Panel A: Outcome 1: Net Discrimination in Receiving Callbacks**](out_figure1a_6x6_cb.png)

![**Figure 1, Panel B: Outcome 2: Net Discrimination in Receiving Offers**](out_figure1b_6x6_off.png)

## Figure 2: Posterior Densities of Treatment Effects on Discrimination

```{r figure2, echo=T, eval=T, warning=F}
##=============================================================================##
## Figure 2: Posterior Probabilities
##=============================================================================##

#### ======== FUNCTIONS =========== ####

## function to compute the bayesian analog of the lmfit
## using non-informative priors and Monte Carlo scheme
## based on N samples;
#  see http://www.r-bloggers.com/bayesian-linear-regression-analysis-without-tears-r/
bayesfit<-function(lmfit,N){
  QR<-lmfit$qr
  df.residual<-lmfit$df.residual
  R<-qr.R(QR) ## R component
  coef<-lmfit$coef
  Vb<-chol2inv(R) ## variance(unscaled)
  s2<-(t(lmfit$residuals)%*%lmfit$residuals)
  s2<-s2[1,1]/df.residual
  
  ## now to sample residual variance
  sigma<-df.residual*s2/rchisq(N,df.residual)
  coef.sim<-sapply(sigma,function(x) mvrnorm(1,coef,Vb*x))
  ret<-data.frame(t(coef.sim))
  names(ret)<-names(lmfit$coef)
  ret$sigma<-sqrt(sigma)
  ret
}

Bayes.sum<-function(x)
{
  c("mean"=mean(x),
    "se"=sd(x),
    "t"=mean(x)/sd(x),
    "median"=median(x),
    "CrI"=quantile(x,prob=0.025),
    "CrI"=quantile(x,prob=0.975)
  )
}

bayes.graph <- function(depvar, treat, weight, block = NULL, main = "Callbacks", #comparison = "Hispanic", 
                       trt_text = "Punitive", draws = 10000, xloc = 0.15, yloc = 13, textadj = 1, ylab = "", print_title = FALSE,
                       vertical = TRUE, maxline = FALSE, addlegend = FALSE, addbox = TRUE, ylim = c(0,18)) {

  lmfit <- lm(depvar ~ treat + factor(block), weights = weight)
  bf <- bayesfit(lmfit, draws)
  t(apply(bf,2,Bayes.sum))
  summary(lmfit)
  post_punitive <- bf$treat

  # graph
  dens <- density(post_punitive, bw = 0.01, from = -.17, to = .10) # to = .04 ; bw = 0.005

  left <- quantile(post_punitive, 0)
  x1 <- min(which(dens$x >= left))
  x2 <- max(which(dens$x <  0))

  pct_left <- round((diff(dens$x[1:2])*sum(dens$y[1:x2])) / (diff(dens$x[1:2])*sum(dens$y)), digits = 3) * 100 # % MOC < 0

  main_txt <- ""
  if(print_title) { main_txt <- paste0("Outcome: ", main, "\n(", trt_text, " vs. Control)") }
  plot(dens, xlab = "", ylab = ylab, main = main_txt, bty = "n", lwd = 2, xlim = c(-.15, .15), ylim = ylim, cex.main = 1.5, cex.lab = 1.2, cex.axis = 1, font = 1) #xlim = c(-.17, .10), ylim = c(0,17)
  with(dens, polygon(x=c(x[c(x1,x1:x2,x2)]), y= c(0, y[x1:x2], 0), col="gray"))
  if(maxline) abline(v = mean(post_punitive), col = "red", lty = 2, lwd = 3)
  text(x = xloc, y = yloc, paste0(pct_left, "%"), adj = c(textadj, NA), cex = 1.2)
  text(x = xloc, y = yloc-.8, "less than zero", adj = c(textadj, NA), cex = 1.2)
  if(vertical) abline(v=0)
  if(addbox) box()
}


#### ======== ANALYSES AND GRAPH RESULTS =========== ####

# CREATE PDF OUTPUT FOR MANUSCRIPT

op <- par()
pdf("out_figure2_posteriors.pdf", width = 13.5, height = 7.5)
par(mfrow = c(2,4), mar=c(2.5,5,5,0.5), family="Helvetica") 
  
  # Hispanic
  # callback
  bayes.graph(dat$ncb_wh[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], trt_text = "Monitoring", ylab = "Discrimination against Hispanics (vs. Whites)", print_title = TRUE)
  bayes.graph(dat$ncb_wh[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], print_title = TRUE)

    # offer
  bayes.graph(dat$noff_wh[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], main = "Offers", trt_text = "Monitoring", print_title = TRUE)
  bayes.graph(dat$noff_wh[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], main = "Offers", print_title = TRUE)

  # Black
  # callback
  bayes.graph(dat$ncb_wb[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], trt_text = "Monitoring", xloc = -0.15, textadj = 0, ylab = "Discrimination against Blacks (vs. Whites)")
  bayes.graph(dat$ncb_wb[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], xloc = -0.15, textadj = 0)
  
  # offer
  bayes.graph(dat$noff_wb[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], main = "Offers", trt_text = "Monitoring", xloc = -0.15, textadj = 0)
  bayes.graph(dat$noff_wb[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], main = "Offers", xloc = -0.15, textadj = 0)
  
dev.off()


# CREATE PNG OUTPUT FOR RMD OUTPUT

par(op)
png("out_figure2_posteriors.png", width = 1350, height = 750)
par(mfrow = c(2,4), mar=c(2.5,5,5,0.5), family="Helvetica")

# Hispanic
# callback
bayes.graph(dat$ncb_wh[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], trt_text = "Monitoring", ylab = "Discrimination against Hispanics (vs. whites)", print_title = TRUE)
bayes.graph(dat$ncb_wh[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], print_title = TRUE)


# offer
bayes.graph(dat$noff_wh[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], main = "Offers", trt_text = "Monitoring", print_title = TRUE)
bayes.graph(dat$noff_wh[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], main = "Offers", print_title = TRUE)

# Black
# callback
bayes.graph(dat$ncb_wb[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], trt_text = "Monitoring", xloc = -0.15, textadj = 0, ylab = "Discrimination against Blacks (vs. whites)")
bayes.graph(dat$ncb_wb[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], xloc = -0.15, textadj = 0)

# offer
bayes.graph(dat$noff_wb[dat$TA!=2], (dat$TA==1)[dat$TA!=2], dat$ipw[dat$TA!=2], dat$block[dat$TA!=2], main = "Offers", trt_text = "Monitoring", xloc = -0.15, textadj = 0)
bayes.graph(dat$noff_wb[dat$TA!=1], (dat$TA==2)[dat$TA!=1], dat$ipw[dat$TA!=1], dat$block[dat$TA!=1], main = "Offers", xloc = -0.15, textadj = 0)

dev.off()
```

![**Figure 2: Posterior Densities of Treatment Effects on Discrimination against Hispanics (top) and African Americans (bottom) Relative to Whites**](out_figure2_posteriors.png)

## Randomization Check (reported in main text, footnote 17)

```{r randcheck, eval=T, echo=T, warning=F}
set.seed(1234567)

Xs = c("frame", "partnered", "rent + m.rent", "numbr", "sqft + m.sqft","regime1","regime2","regime3",
       "nnumattr_bh", "nnumattr_wh", "nnumattr_wb", "nnumskep_bh", "nnumskep_wh", "nnumskep_wb", 
       "npctskep_bh", "npctskep_wh", "npctskep_wb", "nnumpos_bh" , "nnumpos_wh" , "nnumpos_wb" , 
       "nnumneu_bh" , "nnumneu_wh" , "nnumneu_wb" , "nnumneg_bh" , "nnumneg_wh" , "nnumneg_wb" , 
       "npctpos_bh" , "npctpos_wh" , "npctpos_wb" , "npctneu_bh" , "npctneu_wh" , "npctneu_wb" , 
       "npctneg_bh" , "npctneg_wh" , "npctneg_wb" , "nanyskep_bh", "nanyskep_wh", "nanyskep_wb", 
       "nanyneg_bh" , "nanyneg_wh" , "nanyneg_wb" , "team_gender", "broker",
       "callorder.wb", "callorder.wh", "callorder.bh",
       "incrank.wb.gt", "incrank.wh.gt", "incrank.bh.gt",
       "incrank.wb.eq", "incrank.wh.eq", "incrank.bh.eq",
       "incrank.wb.lt", "incrank.wh.lt", "incrank.bh.lt",
       "boro.brx", "boro.brk", "boro.mnh", "boro.que", "boro.stn",
       "inc.w.hi","inc.b.hi","inc.h.hi","ll.female + m.ll.female",
       llrace, llage, testerfe)
fmla = as.formula(paste0("TA ~ ", paste(Xs, collapse="+"), "+ as.factor(block)"))
mnl_fit = multinom(formula=fmla, data=dat, weight=ipw, trace=FALSE) # trace=FALSE to suppress output about convergence
kparams = length(mnl_fit$coefnames)
mnl_loglik = -(mnl_fit$AIC - 2*kparams)/2   # log likelihood from actual model

# randomization inference
ri_mat = cbind(1:nrow(dat), dat$block, dat$TA)
blocks = names(table(dat$block))

# recalculate loglik stat for each shuffled vector of assignments
niter = 1000
mnl_loglik_ri = rep(NA, niter)
fmla_ri = as.formula(paste0("z_ri ~ ", paste(Xs, collapse="+"), "+ as.factor(block)"))
for(j in 1:niter){
  shuffle_mat = list()
  for(i in 1:length(blocks)){
    block_z = ri_mat[ri_mat[,2] == blocks[i],3]
    shuffle_z = sample(block_z, length(block_z), replace=FALSE)
    shuffle_mat[[i]] = cbind(ri_mat[ri_mat[,2] == blocks[i],1:2], shuffle_z)
  }
  shuffle_mat = do.call(rbind, shuffle_mat)
  shuffle_mat = shuffle_mat[order(shuffle_mat[,1]),]
  dat$z_ri = shuffle_mat[,3]
  mnl_fit_ri = multinom(formula=fmla_ri, data=dat, weight=ipw, trace=FALSE)
  kparams = length(mnl_fit_ri$coefnames)
  mnl_loglik_ri[j] = -(mnl_fit_ri$AIC - 2*kparams)/2
}

# calculate p-value
mean(abs(mnl_loglik_ri) >= abs(mnl_loglik))
```

# Appendix Tables and Figures

## Table A1 (p. A-2): Distribution of Experimental Subjects by Randomization Block

```{r tablea1, echo=T, warning=F}
blockLabs <- rep(c("Brooklyn","Bronx","Manhattan","Queens","Staten Island","Likely Discrimination Frame"), 3)
blockLabs <- blockLabs[-length(blockLabs)]
pcts <- round(prop.table(table(dat$block, dat$TA, useNA="ifany")), 3)
block_dist <- cbind(blockLabs, 
                    table(dat$block, dat$TA, useNA="ifany")[,1], pcts[,1],
                    table(dat$block, dat$TA, useNA="ifany")[,2], pcts[,2],
                    table(dat$block, dat$TA, useNA="ifany")[,3], pcts[,3])

block_dist <- rbind(c("Regime 1: 13 Apr 2012 - 9 Sep 2012", rep(NA,6)),
                    block_dist[1:6,],
                    c("Regime 2: 10 Sep 2012 - 7 May 7, 2013", rep(NA,6)),
                    block_dist[7:12,],
                    c("Regime 3: 8 May 2013 to 20 Dec 2013", rep(NA,6)),
                    block_dist[13:17,],
                    c("Total", table(dat$TA)[1], round(table(dat$TA)[1]/sum(table(dat$TA)),3), 
                      table(dat$TA)[2], round(table(dat$TA)[2]/sum(table(dat$TA)),3), 
                      table(dat$TA)[3], round(table(dat$TA)[3]/sum(table(dat$TA)),3)
                    ))

colnames(block_dist) <- c("Block","Control (N)","Control (Proportion)",
                  "Monitoring (N)","Monitoring (Proportion)",
                  "Punitive (N)","Punitive (Proportion)")

block_distcaption = "Distribution of Experimental Subjects by Randomization Block. Cells contain counts of the number and proportion of experimental subjects randomly assigned to each arm by randomization block. The 17 randomization blocks are defined by the ad's sampling stratum (New York City borough or the likely discrimination, or LD, oversample) and by treatment regime (defined as a distinct design and randomization procedure). There are three treatment regimes. Regime 1 was a 5-arm design (2 of which are not analyzed in this paper) with equal treatment assignment probabilities. Regime 2 was a 3-arm design where the probability of assignment to control was 0.5 and the probability of assignment to the monitoring or punitive conditions was 0.25. Regime 3 was a 3-arm design with equal treatment assignment probabilities. Proportions may not sum to 1 due to rounding."

# export table to tex file
print(xtable(block_dist, caption = block_distcaption, label= "blockNs"), file="out_table_a1_dist_by_block.tex", hline.after=c(0,7,14,20,21), include.rownames=FALSE,  only.contents=TRUE)

# show table in Rmd output file
rownames(block_dist) <- NULL
kable(block_dist, caption="**Table A1**")
```

## Table A2 (p. A-3): Incidence of Early Stage Discrimination

```{r tablea2, echo=T, warning=F}

# create table shells

wb <- matrix(NA, nrow=length(es.a), ncol=16)
wh <- matrix(NA, nrow=length(es.a), ncol=16)
bh <- matrix(NA, nrow=length(es.a), ncol=16)

# populate column headings
colnames(wb) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(wh) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(bh) <- c("Measure", paste( c(rep("AuditSamp-",5),rep("ExpSamp-",5),rep("Control-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))

# populate first column - variable names
wb[,1] <- c(es.wb.labs)
wh[,1] <- c(es.wh.labs)
bh[,1] <- c(es.bh.labs)

# check variables
# for(i in 1:length(es.a)){
#   cat("\n *********** variable : ", es.a.labs[i], "****************\n", sep="")
#   print(table(aud[[es.a[i]]], useNA="ifany"))
#   cat("\n *********** variable : ", es.b.labs[i], "****************\n", sep="")
#   print(table(aud[[es.b[i]]], useNA="ifany"))
#   cat("\n *********** variable : ", es.c.labs[i], "****************\n", sep="")
#   print(table(aud[[es.c[i]]], useNA="ifany"))
# }

# convert all vars into numeric

cols <- c(es.a, es.b, es.c, es.wb, es.wh, es.bh)
aud[,cols] <- lapply(aud[,cols], as.numeric)

# populate table shells

for(i in 1:nrow(wb)){
  # audit sample
  wb[i,2] <- mean(aud[[es.c[i]]])
  wb[i,3] <- mean(aud[[es.a[i]]])
  wb[i,4] <- as.numeric(wb[i,2]) - as.numeric(wb[i,3])
  wb[i,5] <- t.test(x=aud[[es.c[i]]], y=aud[[es.a[i]]], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wb[i,6] <- length(aud[[es.c[i]]]) 
  
  # experiment sample
  wb[i,7] <- mean(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  wb[i,8] <- mean(aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  wb[i,9] <- as.numeric(wb[i,7]) - as.numeric(wb[i,8])
  wb[i,10] <- t.test(x=aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], y=aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wb[i,11] <- length(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ]) 
  
  # control group
  wb[i,12] <- mean(aud[[es.c[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  wb[i,13] <- mean(aud[[es.a[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  wb[i,14] <- as.numeric(wb[i,12]) - as.numeric(wb[i,13])
  wb[i,15] <- t.test(x=aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) & aud$TA==0], y=aud[[es.a[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wb[i,16] <- length(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp)  & aud$TA==0]) 
  
}


for(i in 1:nrow(wh)){
  # audit sample
  wh[i,2] <- mean(aud[[es.c[i]]])
  wh[i,3] <- mean(aud[[es.b[i]]])
  wh[i,4] <- as.numeric(wh[i,2]) - as.numeric(wh[i,3])
  wh[i,5] <- t.test(x=aud[[es.c[i]]], y=aud[[es.b[i]]], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wh[i,6] <- length(aud[[es.c[i]]]) 
  
  # experiment sample
  wh[i,7] <- mean(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  wh[i,8] <- mean(aud[[es.b[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  wh[i,9] <- as.numeric(wh[i,7]) - as.numeric(wh[i,8])
  wh[i,10] <- t.test(x=aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], y=aud[[es.b[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wh[i,11] <- length(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) ]) 
  
  # control group
  wh[i,12] <- mean(aud[[es.c[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  wh[i,13] <- mean(aud[[es.b[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  wh[i,14] <- as.numeric(wh[i,12]) - as.numeric(wh[i,13])
  wh[i,15] <- t.test(x=aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp) & aud$TA==0], y=aud[[es.b[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  wh[i,16] <- length(aud[[es.c[i]]][aud$insamp=="1" & !is.na(aud$insamp)  & aud$TA==0]) 
  
}

for(i in 1:nrow(bh)){
  # audit sample
  bh[i,2] <- mean(aud[[es.a[i]]])
  bh[i,3] <- mean(aud[[es.b[i]]])
  bh[i,4] <- as.numeric(bh[i,2]) - as.numeric(bh[i,3])
  bh[i,5] <- t.test(x=aud[[es.a[i]]], y=aud[[es.b[i]]], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  bh[i,6] <- length(aud[[es.a[i]]]) 
  
  # experiment sample
  bh[i,7] <- mean(aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  bh[i,8] <- mean(aud[[es.b[i]]][aud$insamp=="1" & !is.na(aud$insamp) ])
  bh[i,9] <- as.numeric(bh[i,7]) - as.numeric(bh[i,8])
  bh[i,10] <- t.test(x=aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], y=aud[[es.b[i]]][aud$insamp=="1" & !is.na(aud$insamp) ], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  bh[i,11] <- length(aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) ]) 
  
  # control group
  bh[i,12] <- mean(aud[[es.a[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  bh[i,13] <- mean(aud[[es.b[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0])
  bh[i,14] <- as.numeric(bh[i,12]) - as.numeric(bh[i,13])
  bh[i,15] <- t.test(x=aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp) & aud$TA==0], y=aud[[es.b[i]]][aud$insamp=="1"  & !is.na(aud$insamp) & aud$TA==0], alternative="two.sided", paired=TRUE, conf.level=0.95)$p.value
  bh[i,16] <- length(aud[[es.a[i]]][aud$insamp=="1" & !is.na(aud$insamp)  & aud$TA==0]) 
  
}

for(i in 2:ncol(wb)){
  wb[,i] <- round(as.numeric(wb[,i]), 3)
  wh[,i] <- round(as.numeric(wh[,i]), 3)
  bh[,i] <- round(as.numeric(bh[,i]), 3)
  
  if(i %in% c(5,10,15)) {
    wb[,i] <- paste("(",wb[,i],")",sep="")
    wh[,i] <- paste("(",wh[,i],")",sep="")
    bh[,i] <- paste("(",bh[,i],")",sep="")
  }
  if(i %in% c(6,11,16)) {
    wb[,i] <- paste("[",wb[,i],"]",sep="")
    wh[,i] <- paste("[",wh[,i],"]",sep="")
    bh[,i] <- paste("[",bh[,i],"]",sep="")
  }
}


es_discrim_full_table <- rbind(wb, wh, bh)

table_a2 <- es_discrim_full_table[c(1,2,14,15,27,28),1:6]
colnames(table_a2)[2:6] <- c("Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]")

print(xtable(table_a2, caption=c("Incidence of Early Stage Discrimination",
                            "Incidence of Early Stage Discrimination")),
      file="out_tablea2_early_stage_discrim.tex",
      include.rownames=FALSE)

kable(table_a2, caption="**Table A2**")

#### F-test for the null hypothesis that race does not matter for making any contact or for scheduling appointment

aud.rs <- melt(aud[,c("cid", "contact_A", "contact_B", "contact_C", "sched_A", "sched_B", "sched_C")], id.vars="cid")

aud.rs$ttype <- sapply(strsplit(as.character(aud.rs$variable), "_", fixed=TRUE), function(x) x[2])
aud.rs$variable <- sapply(strsplit(as.character(aud.rs$variable), "_", fixed=TRUE), function(x) x[1])
aud.rs <- dcast(aud.rs, cid + ttype ~ variable, value.var="value")

fit.contact <- lm(contact ~ ttype, data=aud.rs)
fit.sched <- lm(sched ~ ttype, data=aud.rs)

print("F-test of the null that tester race predicts whether the tester makes any contact with the landlord")
print(summary(fit.contact))

print("F-test of the null that tester race predicts whether the tester successfully schedules an appointment")
print(summary(fit.sched))

```

## Figure A2 (p. A-5): Cumulative Number of Cases Over Implementation Period

```{r figurea2png, echo=T, warning=F}
dat$casedate <- as.Date(dat$casedate, format="%Y-%m-%d")
casedate <- dat$casedate[order(dat$casedate)]

# generate .png
png("out_figurea2_cases_over_time.png", height=500, width=1200)
par(oma=c(2,0,3,0), xpd=TRUE)

plot(casedate, 1:length(casedate), xaxt="n", yaxt="n", type="l", ylim=c(0,700), xlab="", ylab="Cumulative Number of Cases", xaxs="i", yaxs="i")
axis.Date(side=1, at=seq(as.Date("2012-04-01"), as.Date("2014-01-01"), "month"), format="%m/%Y", las=2, cex.axis=0.9)
axis(side=2, at=seq(0,700,100), labels=seq(0,700,100), cex.axis=0.9, las=1)
polygon(x=c(rep(as.Date("2012-10-29"),2), rep(as.Date("2012-11-11"),2)),
        y=c(700,0,0,700),
        col=rgb(.8,.8,.8,.25),
        border=rgb(.8,.8,.8,.25))
text(x=as.Date("2012-11-11"), y=600, "Implementation Halted \nDue to Hurricane Sandy", cex=.9, pos=1)
mtext(side=1, "Date", outer=TRUE)

segments(x0=as.Date("2012-09-09"), y0=700, x1=as.Date("2012-09-09"), y1=0, lty=5, col="grey80")
segments(x0=as.Date("2013-05-07"), y0=700, x1=as.Date("2013-05-07"), y1=0, lty=5, col="grey80")

text(x=as.Date("2012-07-01"), y=840, "Regime 1: 5-Arm Design \nApril 13, 2012 to September 9, 2012 \nEqual Assignment Probabilities", cex=.9, pos=1)
text(x=as.Date("2013-01-15"), y=840, "Regime 2: 3-Arm Design \nSeptember 10, 2012 to May 7, 2013 \nPr(Control)=.5, Pr(Monitoring)=Pr(Punitive)=.25", cex=.9, pos=1)
text(x=as.Date("2013-09-01"), y=840, "Regime 3: 3-Arm Design \nMay 8, 2013 to December 20, 2013 \nEqual Assignment Probabilities", cex=.9, pos=1)

dev.off()
```

```{r figurea2pdf, echo=F, warning=F}
pdf("out_figurea2_cases_over_time.pdf", height=5, width=12)
par(oma=c(2,0,0,0), xpd=TRUE)

plot(casedate, 1:length(casedate), xaxt="n", yaxt="n", type="l", ylim=c(0,700), xlab="", ylab="Cumulative Number of Cases", xaxs="i", yaxs="i")
axis.Date(side=1, at=seq(as.Date("2012-04-01"), as.Date("2014-01-01"), "month"), format="%m/%Y", las=2, cex.axis=0.9)
axis(side=2, at=seq(0,700,100), labels=seq(0,700,100), cex.axis=0.9, las=1)
polygon(x=c(rep(as.Date("2012-10-29"),2), rep(as.Date("2012-11-11"),2)),
        y=c(700,0,0,700),
        col=rgb(.8,.8,.8,.25),
        border=rgb(.8,.8,.8,.25))
text(x=as.Date("2012-11-11"), y=600, "Implementation Halted \nDue to Hurricane Sandy", cex=.9, pos=1)
mtext(side=1, "Date", outer=TRUE)

segments(x0=as.Date("2012-09-09"), y0=700, x1=as.Date("2012-09-09"), y1=0, lty=5, col="grey80")
segments(x0=as.Date("2013-05-07"), y0=700, x1=as.Date("2013-05-07"), y1=0, lty=5, col="grey80")

text(x=as.Date("2012-07-01"), y=840, "Regime 1: 5-Arm Design \nApril 13, 2012 to September 9, 2012 \nEqual Assignment Probabilities", cex=.9, pos=1)
text(x=as.Date("2013-01-15"), y=840, "Regime 2: 3-Arm Design \nSeptember 10, 2012 to May 7, 2013 \nPr(Control)=.5, Pr(Monitoring)=Pr(Punitive)=.25", cex=.9, pos=1)
text(x=as.Date("2013-09-01"), y=840, "Regime 3: 3-Arm Design \nMay 8, 2013 to December 20, 2013 \nEqual Assignment Probabilities", cex=.9, pos=1)
dev.off()
```

![**Figure A2**](out_figurea2_cases_over_time.png)

## Figure A3 (p. A-6): Discrimination Levels and Treatment Effects using the Subjective Discrimination Index

**Note:** Analysis and figure construction code for Figure A3 is shown above in _"1.1 Figure 1: Main Results on Discrimination Levels and Treatment Effects"_

![**Figure A3: Net Discrimination in Subjective Discrimination Index**](out_figurea3_6x6_index.png)

## Figure A4 (p. A-17): Estimates of Tester Random Effects on Outcome Measures

```{r figurea4code, echo=T, eval=T, warning=F}

ctx$weekday <- as.factor(weekdays(as.Date(ctx$sfa_B4_apptdate, format="%m/%d/%y")))

anova_fig <- data.frame(outcome = c("cb","off","qualpraise","sales","posedit","posbg","prof"),
                        ylab = c("Callbacks", "Offers", "Praise", "Perceived Sales Efforts", "Positive Editorializing", "Positive Reactions", "Professionalism"),
                        title = c("Tester Random Effects on Callback Incidence", "Tester Random Effects on Offer Incidence",
                                  "Tester Random Effects on Praise re: Qualifications to Rent", "Tester Random Effects on Sales Efforts",
                                  "Tester Random Effects on Positive Editorializing", "Tester Random Effects on Positive Reactions to Background",
                                  "Tester Random Effects on Professionalism"))

# MAKE GRAPHS

for(ii in 1:nrow(anova_fig)){
  model <- lmer(paste0(anova_fig$outcome[ii], " ~ (1|tid) + (1|ttype) + callorder + weekday + partnered + numbr + anyskep + anyneg + (1|team_gender)"), data=ctx)
  model_off <- lmer(paste0(anova_fig$outcome[2], " ~ (1|tid) + (1|ttype) + callorder + weekday + partnered + numbr + anyskep + anyneg + (1|team_gender)"), data=ctx)
  ifelse(ii>=2, se <- se.ranef(model_off)[[1]][i,1], se <- se.ranef(model)[[1]][i,1])
  i <- 1:(length(ranef(model)[[1]][,1]))
  y <- ranef(model)[[1]][,1] # individual tester RE
  types <- c(rep(0,14), rep(1,19), rep(2,15))
  col <- rainbow(3)[types+1]

  # output PDF file for tex
  pdf(paste0("out_figurea4",letters[ii],"_anova_",anova_fig$outcome[ii],".pdf"), width=8)
  plot(i, y, col = col, ylim=c(-1.0,1),xlim=c(1,50),xlab="Testers", ylab=anova_fig$ylab[ii],main=anova_fig$title[ii],pch=20,bty="n")
  legend("bottom", c("Black","Hispanic","White"), pch=20, col=rainbow(3),horiz=T,bty="n")
  segments(i, y + 2*se, i, y - 2*se,  col = col)
  abline(a=0,b=0,lty=2)
  dev.off()

  # output PNG file for Rmd output
  png(paste0("out_figurea4",letters[ii],"_anova_",anova_fig$outcome[ii],".png"), width=800)
  par(0,0,0,0)
  plot(i, y, col = col, ylim=c(-1.0,1),xlim=c(1,50),xlab="Testers", ylab=anova_fig$ylab[ii],main=anova_fig$title[ii],pch=20,bty="n")
  legend("bottom", c("Black","Hispanic","White"), pch=20, col=rainbow(3),horiz=T,bty="n")
  segments(i, y + 2*se, i, y - 2*se,  col = col)
  abline(a=0,b=0,lty=2)
  dev.off()
}
```

![**Figure A4, panel a: Post-Visit Callback**](out_figurea4a_anova_cb.png){width=60%}

![**Figure A4, panel b: Post-Visit Offer**](out_figurea4b_anova_off.png){width=60%}

![**Figure A4, panel c: Positive Biographical Feedback**](out_figurea4f_anova_posbg.png){width=60%}

![**Figure A4, panel d: Positive Editorializing**](out_figurea4e_anova_posedit.png){width=60%}

![**Figure A4, panel e: Praised Rental Qualifications**](out_figurea4c_anova_qualpraise.png){width=60%}

![**Figure A4, panel f: Sales Efforts**](out_figurea4d_anova_sales.png){width=60%}

![**Figure A4, panel g: Professionalism**](out_figurea4g_anova_prof.png){width=60%}


```{r randfxmodels, echo=T, eval=T, warning=F}
### The following analysis corresponds to the analyses described in footnote 22 in the article.
ctx$weekday <- as.factor(weekdays(as.Date(ctx$sfa_B4_apptdate, format="%m/%d/%y")))
dat_tid <- dat[,c("cid","TA",names(dat)[grepl("tid",names(dat))])]
dat_tid <- cbind(dat_tid[,c("cid","TA")], t(apply(dat_tid[,3:ncol(dat_tid)], 1, function(J) names(dat_tid[,3:ncol(dat_tid)])[which(J==1)])))

dat_tid_1 <- as.data.frame(dat_tid[,c(1,2,3)], stringsAsFactors=FALSE)
dat_tid_2 <- as.data.frame(dat_tid[,c(1,2,4)], stringsAsFactors=FALSE)
dat_tid_3 <- as.data.frame(dat_tid[,c(1,2,5)], stringsAsFactors=FALSE)

dat_tid_1[,3] <- as.character(dat_tid_1[,3])
dat_tid_2[,3] <- as.character(dat_tid_2[,3])
dat_tid_3[,3] <- as.character(dat_tid_3[,3])
names(dat_tid_1)[3] <- names(dat_tid_2)[3] <- names(dat_tid_3)[3] <- "tid"
dat_tid <- rbind(dat_tid_1, dat_tid_2, dat_tid_3)
dat_tid$tid <- gsub("tid.","",dat_tid$tid,fixed=TRUE)
dat_tid$ttype <- substr(dat_tid$tid, 1, 1)

dat_sub <- dat[,c("cid","TA","team_gender","callorder","partnered","numbr",
       paste0(rep(c("anyskep","anyneg","cb","off","prof","posedit","posbg","qualpraise","sales"), 3),
              rep(c("_A","_B","_C"), each=9))
       )]

dmelt <- melt(dat_sub, id.vars=c("cid", "TA", "team_gender", "callorder", "partnered", "numbr"))
dmelt$ttype <- ifelse(grepl("_A", dmelt$variable, fixed=TRUE), "A",
                      ifelse(grepl("_B", dmelt$variable, fixed=TRUE), "B", "C"))
dmelt$variable <- gsub("_A", "", dmelt$variable, fixed=TRUE)
dmelt$variable <- gsub("_B", "", dmelt$variable, fixed=TRUE)
dmelt$variable <- gsub("_C", "", dmelt$variable, fixed=TRUE)
dat_sub2 <- dcast(dmelt, cid + TA + team_gender + callorder + partnered + numbr + ttype ~ variable, value.var="value")
dat2 <- join(dat_tid, dat_sub2, by=c("cid","TA","ttype"), type="left", match="all")

ctx_sub <- ctx[,c("cid","weekday")]
ctx_sub <- ctx_sub[!duplicated(ctx_sub$cid),]
#ctx[ctx$cid %in% ctx_sub$cid[is.na(ctx_sub$weekday)] & !is.na(ctx$weekday),c("cid","weekday")]
ctx_sub$weekday <- ifelse(ctx_sub$cid == 11108, "Wednesday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 12116,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 12608,    "Monday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 13361,    "Monday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 15734,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 19446,  "Thursday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 26324,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(ctx_sub$cid == 30165,    "Friday", ctx_sub$weekday)
ctx_sub$weekday <- ifelse(is.na(ctx_sub$weekday), "None", ctx_sub$weekday)

dat3 <- join(dat2, ctx_sub, by="cid", type="left", match="all")

model.callback.0 <- lmer(cb ~ (1|tid) + (1|ttype) + callorder + weekday + 
                         partnered + numbr + anyskep + anyneg + (1|team_gender), 
                       data=dat3, subset=(TA==0))

model.offer.0 <- lmer(off ~ (1|tid) + (1|ttype) + callorder + weekday + 
                      partnered + numbr + anyskep + anyneg + (1|team_gender), 
                    data=dat3, subset=(TA==0))

model.callback.all <- lmer(cb ~ (1|tid) + (1|ttype) + callorder + weekday + 
                           partnered + numbr + anyskep + anyneg + (1|team_gender), 
                         data=dat3)

model.offer.all <- lmer(off ~ (1|tid) + (1|ttype) + callorder + weekday + 
                        partnered + numbr + anyskep + anyneg + (1|team_gender), 
                      data=dat3)

re_out <- list(model.callback.0   , 
               model.offer.0      ,
               model.callback.all ,
               model.offer.all    )

re_table <- lapply(re_out, function(J){
  x <- as.data.frame(VarCorr(J))
  x[,4] <- round(x[,4], 4)
  x <- x[,c(1,4)]
  return(x)
})

re_table <- do.call(cbind, re_table)
re_table <- re_table[,-c(3,5,7)]
re_table[,1] <- c("Estimated Variance of Varying Tester Intercepts",
                  "Estimated Variance of Varying Tester Race Intercepts",
                  "Estimated Variance of Varying Tester Team Gender Intercepts",
                  "Estimated Residual Variance")
colnames(re_table) <- c("", "Control Group, Outcome: Callback", "Control Group, Outcome: Offer",
                        "Experimental Sample, Outcome: Callback", "Experimental Sample, Outcome: Offer")

kable(re_table)
```

## Table A4 (p. A-20): Selected Characteristics of Housing Units in the Audit and Experimental Samples

We summarize selected _pre-treatment_ characteristics of housing units in the audit and experimental samples. To ensure the measures are _pre-treatment_ quantities, we summarize characteristics of housing units that are posted on and scraped from the original Craigslist ads.

```{r tablea4, echo=T, eval=T, warning=F}

# Helper functions and code to calculate descriptive statistics
stratum_labels <- c("Total", "Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island", "Likely Discrimination Frame")

calc_mean_sds <- function(df, x, label, digs=NULL) {
  
  x_mean <- c(mean(df[[x]], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_sd <- c(sd(df[[x]], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  if(is.null(digs)) digs <- 2
  x_mean <- sprintf( paste0("%.",digs,"f") , round(x_mean, digs) )
  x_sd <- paste0("(", sprintf( paste0("%.",digs,"f") , round(x_sd, digs) ) ,")")
  
  x <- cbind(x_mean, x_sd)
  colnames(x) <- paste0(label, c("_mean", "_sd"))
  rownames(x) <- NULL
  return(x)
}

calc_medians <- function(df, x, label, digs=NULL) {
  
  x_med <- c(median(df[[x]], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
             median(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  if(is.null(digs)) digs <- 2
  x_med <- sprintf( paste0("%.",digs,"f") , round(x_med, digs) )

  x <- cbind(x_med, rep("", length(x_med)))
  colnames(x) <- c(paste0(label, "_median"), "")
  rownames(x) <- NULL
  return(x)
}

calc_pct_ses <- function(df, x, label, digs=NULL) {
  
  x_prop <- c(mean(df[[x]], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
              mean(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_sd <- c(sd(df[[x]], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
            sd(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_n <- c(sum(!is.na(df[[x]])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ])),
           sum(!is.na(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ])),
           sum(!is.na(df[[x]][ df[["frame"]] != "representative" ]))
           )
  
  x_se <- x_sd / sqrt(x_n)
  
  if(is.null(digs)) digs <- 2
  x_pct <- sprintf( paste0("%.",digs,"f") , round(x_prop * 100, digs) )
  x_se <- paste0("(", sprintf( paste0("%.",digs,"f") , round(x_se * 100, digs) ) ,")")
  
  x <- cbind(x_pct, x_se)
  colnames(x) <- paste0(label, c("_p", "_se"))
  rownames(x) <- NULL
  return(x)
}

calc_min_max <- function(df, x, label, digs=NULL) {
  
  x_min <- c(min(df[[x]], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
              min(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  x_max <- c(max(df[[x]], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brk" ], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "brx" ], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "mnh" ], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "que" ], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] == "representative" & df[["boro"]] == "stn" ], na.rm=TRUE),
            max(df[[x]][ df[["frame"]] != "representative" ], na.rm=TRUE))
  
  if(is.null(digs)) digs <- 2
  x_min <- sprintf( paste0("%.",digs,"f") , round(x_min, digs) )
  x_max <- sprintf( paste0("%.",digs,"f") , round(x_max, digs) )
  
  x <- cbind(x_min, x_max)
  colnames(x) <- paste0(label, c("_min", "_max"))
  rownames(x) <- NULL
  return(x)
}

#----------------------------------------#
# Panel A: Number of units

panel_a_aud_n <- c(nrow(aud),
                   table(aud$boro[aud$frame == "representative"]),
                   nrow(aud[aud$frame != "representative",]))
panel_a_aud_p <- paste0("(", sprintf("%.2f", round(panel_a_aud_n / nrow(aud) * 100, 2))  ,")")

panel_a_exp_n <- c(nrow(dat),
                   table(dat$boro[dat$frame == "representative"]),
                   nrow(dat[dat$frame != "representative",]))
panel_a_exp_p <- paste0("(", sprintf("%.2f", round(panel_a_exp_n / nrow(dat) * 100, 2))  ,")")

panel_a_trt_n <- c(nrow(dat[dat$TA != 0,]),
                   table(dat$boro[dat$frame == "representative" & dat$TA != 0]),
                   nrow(dat[dat$frame != "representative" & dat$TA != 0,]))
panel_a_trt_p <- paste0("(", sprintf("%.2f", round(panel_a_trt_n / nrow(dat[dat$TA != 0,]) * 100, 2)) ,")")

panel_a_con_n <- c(nrow(dat[dat$TA == 0,]),
                   table(dat$boro[dat$frame == "representative" & dat$TA == 0]),
                   nrow(dat[dat$frame != "representative" & dat$TA == 0,]))
panel_a_con_p <- paste0("(", sprintf("%.2f", round(panel_a_con_n / nrow(dat[dat$TA == 0,]) * 100, 2)) ,")")

tab_a4_panel_a <- cbind(stratum_labels,
                        panel_a_aud_n,
                        panel_a_aud_p,
                        panel_a_exp_n,
                        panel_a_exp_p,
                        panel_a_trt_n,
                        panel_a_trt_p,
                        panel_a_con_n,
                        panel_a_con_p)

rownames(tab_a4_panel_a) <- NULL
colnames(tab_a4_panel_a) <- gsub("panel_a_", "", colnames(tab_a4_panel_a), fixed=TRUE)

kable(tab_a4_panel_a, col.names=c("Stratum", "Audit Sample (N)", "Audit Sample (%)",
                                  "Exp Sample (N)", "Exp Sample (%)",
                                  "Any Treatment (N)", "Any Treatment (%)",
                                  "Control (N)", "Control (%)"),
      caption="**Table A4, Panel A. Number of Units (Scraped from Craigslist).**")

#----------------------------------------#
# Panel B: Monthly asking price (mean)

tab_a4_panel_b <- cbind(stratum_labels,
                    calc_mean_sds(df=aud, x="rent", label="aud"),
                    calc_mean_sds(df=dat, x="rent", label="exp"),
                    calc_mean_sds(df=dat[dat$TA != 0,], x="rent", label="trt"),
                    calc_mean_sds(df=dat[dat$TA == 0,], x="rent", label="con"))

kable(tab_a4_panel_b, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel B. Monthly Asking Rental Price ($, Scraped from Craigslist).**")

#----------------------------------------#
# Panel C: Monthly asking price (median)

tab_a4_panel_c <- cbind(stratum_labels,
                        calc_medians(df=aud, x="rent", label="aud"),
                        calc_medians(df=dat, x="rent", label="exp"),
                        calc_medians(df=dat[dat$TA != 0,], x="rent", label="trt"),
                        calc_medians(df=dat[dat$TA == 0,], x="rent", label="con"))

kable(tab_a4_panel_c, col.names=c("Stratum", "Audit Sample (Median)", "",
                                  "Exp Sample (Median)", "",
                                  "Any Treatment (Median)", "",
                                  "Control (Median)", ""),
      caption="**Table A4, Panel C. Median Monthly Asking Rental Price ($, Scraped from Craigslist).**")

#----------------------------------------#
# Panel D: Number of bedrooms

tab_a4_panel_d <- cbind(stratum_labels,
                        calc_mean_sds(df=aud, x="numbr", label="aud"),
                        calc_mean_sds(df=dat, x="numbr", label="exp"),
                        calc_mean_sds(df=dat[dat$TA != 0,], x="numbr", label="trt"),
                        calc_mean_sds(df=dat[dat$TA == 0,], x="numbr", label="con"))

kable(tab_a4_panel_d, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel D. Number of Bedrooms (Scraped from Craigslist).**")

#----------------------------------------#
# Panel E: Square footage

tab_a4_panel_e <- cbind(stratum_labels,
                        calc_mean_sds(df=aud, x="sqft", label="aud"),
                        calc_mean_sds(df=dat, x="sqft", label="exp"),
                        calc_mean_sds(df=dat[dat$TA != 0,], x="sqft", label="trt"),
                        calc_mean_sds(df=dat[dat$TA == 0,], x="sqft", label="con"))

kable(tab_a4_panel_e, col.names=c("Stratum", "Audit Sample (Mean)", "Audit Sample (SD)",
                                  "Exp Sample (Mean)", "Exp Sample (SD)",
                                  "Any Treatment (Mean)", "Any Treatment (SD)",
                                  "Control (Mean)", "Control (SD)"),
      caption="**Table A4, Panel E. Square Footage (Scraped from Craigslist).** Standard deviation is noted as NA when only one observation in a given stratum has non-missing square footage data. Square footage information is rarely posted on Craigslist rental ads in New York City.")

#----------------------------------------#
# Panel F: Listed by broker

tab_a4_panel_f <- cbind(stratum_labels,
                        calc_pct_ses(df=aud, x="broker", label="aud"),
                        calc_pct_ses(df=dat, x="broker", label="exp"),
                        calc_pct_ses(df=dat[dat$TA != 0,], x="broker", label="trt"),
                        calc_pct_ses(df=dat[dat$TA == 0,], x="broker", label="con"))

kable(tab_a4_panel_f, col.names=c("Stratum", "Audit Sample (%)", "Audit Sample (SE)",
                                  "Exp Sample (%)", "Exp Sample (SE)",
                                  "Any Treatment (%)", "Any Treatment (SE)",
                                  "Control (%)", "Control (SE)"),
      caption="**Table A4, Panel E. Listed by Broker (Scraped from Craigslist).**")

#----------------------------------------#
# Min/Max Advertised Monthly Rental Price 
# (Reported in online SI text, page A-19)

min_max_monthly_rent <- cbind(stratum_labels,
                              calc_min_max(df=aud, x="rent", label="aud"),
                              calc_min_max(df=dat, x="rent", label="exp"),
                              calc_min_max(df=dat[dat$TA != 0,], x="rent", label="trt"),
                              calc_min_max(df=dat[dat$TA == 0,], x="rent", label="con"))

kable(min_max_monthly_rent, col.names=c("Stratum", "Audit Sample (Min)", "Audit Sample (Max)",
                                        "Exp Sample (Min)", "Exp Sample (Max)",
                                        "Any Treatment (Min)", "Any Treatment (Max)",
                                        "Control (Min)", "Control (Max)"),
      caption="**Mininum and Maximum Advertised Monthly Rental Price (Reported in online SI, page A-19)**")
```

```{r, echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

#----------------------------------------#
# Stitch together Table A4, export as tex

tab_a4_panel_a <- rbind(c("Panel A. Number of Units", "N", "Percent", "N", "Percent", "N", "Percent", "N", "Percent"),
                        tab_a4_panel_a)
tab_a4_panel_b <- rbind(c("Panel B. Monthly Asking Price ($)", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_b)
tab_a4_panel_c <- rbind(c("Panel C. Median Monthly Asking Price ($)", rep(c("Median", ""), 4)),
                        tab_a4_panel_c)
tab_a4_panel_d <- rbind(c("Panel D. Number of Bedrooms", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_d)
tab_a4_panel_e <- rbind(c("Panel E. Square Footage", rep(c("Mean", "SD"), 4)),
                        tab_a4_panel_e)
tab_a4_panel_f <- rbind(c("Panel F. Listed by Broker", rep(c("Percent", "SE"), 4)),
                        tab_a4_panel_f)

colnames(tab_a4_panel_a) <- colnames(tab_a4_panel_b) <- colnames(tab_a4_panel_c) <- 
  colnames(tab_a4_panel_d) <- colnames(tab_a4_panel_e) <- colnames(tab_a4_panel_f) <- NULL

table_a4 <- rbind(tab_a4_panel_a,
                  tab_a4_panel_b,
                  tab_a4_panel_c,
                  tab_a4_panel_d,
                  tab_a4_panel_e,
                  tab_a4_panel_f)

colnames(table_a4)[1:ncol(table_a4)] <- c("",
                                          rep("Audit Sample",2),
                                          rep("Experimental Sample",2),
                                          rep("Any Treatment",2),
                                          rep("Control Group",2))

table_a4 <- print(xtable(table_a4), include.rownames=FALSE)
table_a4 <- unlist(strsplit(table_a4, "\n"))

swaps <- rbind(c("\\begin{tabular}{lllllllll}", "\\begin{tabular}{lrr|rr|rr|rr}"),
               c("V1 & Audit Sample & Audit Sample & Experimental Sample & Experimental Sample & Any Treatment & Any Treatment & Control Group & Control Group \\\\ ",
                 " & \\multicolumn{2}{c|}{I. Audit Sample} & \\multicolumn{2}{c|}{II. Experimental Sample} & \\multicolumn{2}{c|}{III. Any Treatment} & \\multicolumn{2}{c}{IV. Control Group} \\\\ "))


for(i in 1:nrow(swaps)){
  table_a4 <- gsub(swaps[i,1] , swaps[i,2] , table_a4, fixed=TRUE)
}
rm(i)

table_a4 <- c(table_a4[3:4],
              "\\resizebox{1\\textwidth}{!}{",
              table_a4[5:9], "\\cline{2-9}",
              table_a4[10:16], "\\hline",
              table_a4[17], "\\cline{2-9}",
              table_a4[18:24],"\\hline",
              table_a4[25], "\\cline{2-9}",
              table_a4[26:32],"\\hline",
              table_a4[33], "\\cline{2-9}",
              table_a4[34:40],"\\hline",
              table_a4[41], "\\cline{2-9}",
              table_a4[42:48],"\\hline",
              table_a4[49], "\\cline{2-9}",
              table_a4[50:58],
              "}",
              "\\caption{Selected Pre-Treatment Characteristics of Housing Units in the Audit and Experimental Samples. Data on housing characteristics are scraped from Craigslist ads. In cells where the standard deviation is NA (i.e., for square footage), this means that only one observation had non-missing data; these data were largely missing because square footage information is rarely included in Craigslist rental listings in New York City.} \\label{samplechar}",
              table_a4[59])

cat(table_a4, file="out_tablea4_sample_characteristics.tex", sep="\n")
```

## Table A5 (p. A-21): Distribution of Rental Units Across Boroughs, by Sample

```{r tablea5, echo=T, eval=T, warning=F}
# Note: Citywide 2011 numbers are from Table 5 in 
#  http://www.nyc.gov/html/hpd/downloads/pdf/HPD-2011-HVS-Selected-Findings-Tables.pdf
# For an archived version of this document available via the Internet Archive Wayback Machine, see
#  http://web.archive.org/web/20131102065647/http://www.nyc.gov/html/hpd/downloads/pdf/HPD-2011-HVS-Selected-Findings-Tables.pdf 
# A PDF copy of this document is also included in the replication archive

tab.city.n <- c(691178, 388022, 587313, 449108, 57013)
tab.city.pct <- c(31.81, 17.86, 27.03, 20.67, 2.62)

db.aud <- ddply(aud[aud$frame != "likely disc",], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(aud[aud$frame != "likely disc",])*100, 2))

db.exp <- ddply(dat[dat$frame != "likely disc",], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(dat[dat$frame != "likely disc",])*100, 2))

db.con <- ddply(dat[dat$frame != "likely disc" & dat$TA==0,], .(boro), summarise,
                num = length(cid),
                pct = round(length(cid)/nrow(dat[dat$frame != "likely disc" & dat$TA==0,])*100, 2))

colnames(db.aud) <- c("boro", "Audit Sample (N)", "Audit Sample (%)")
colnames(db.exp) <- c("boro", "Exp Sample (N)", "Exp Sample (%)")
colnames(db.con) <- c("boro", "Control Group (N)", "Control Group (%)")

tab.db <- join(db.aud, db.exp, by="boro", type="left", match="all")
tab.db <- join(tab.db, db.con, by="boro", type="left", match="all")

tab.db <- cbind(tab.db[,1],
                tab.city.n,
                tab.city.pct,
                tab.db[,2:ncol(tab.db)])

tab.db[,1] <- c("Brooklyn", "Bronx", "Manhattan", "Queens", "Staten Island")
colnames(tab.db)[1:3] <- c("Borough", "Citywide 2011 (N)", "Citywide 2011 (%)")

tab.db <- rbind(tab.db,
                c("Total", round(apply(tab.db[,2:ncol(tab.db)],2,sum), 0)))

print(xtable(tab.db), file="out_tablea5_dist_by_borough.tex", include.rownames=FALSE)

kable(tab.db, caption="**Table A5**")
```

## Figure A5 (p. A-22): Map of the Geographic Distribution of Housing Units Corresponding to Advertised Listings

We omit the replication data and code to produce Figure A5, the map of the geographic distribution of rental units in the audit and experimental samples, because these require non-anonymized data that contain personal identifying information about the subjects.

## Table A6 (p. A-24): Baseline Incidence of Discrimination: In-Person and Post-Visit

```{r tablea6, echo=T, eval=T, warning=F}

# One table shell for each comparison (white-black, white-hispanic, black-hispanic)
# 1 = one objective measure, then number of subjective indicaors, then post-visit outcomes, then one for the index measure
wb <- matrix(NA, nrow=1+nrow(net.subj.mat)+nrow(net.post.mat)+1, ncol=16)
wh <- matrix(NA, nrow=1+nrow(net.subj.mat)+nrow(net.post.mat)+1, ncol=16)
bh <- matrix(NA, nrow=1+nrow(net.subj.mat)+nrow(net.post.mat)+1, ncol=16)

# populate column headings
colnames(wb) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(wh) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))
colnames(bh) <- c("Measure", paste( c(rep("Control-",5),rep("AnyTreat-",5),rep("ExpSamp-",5)) , rep(c("Maj-Mean","Min-Mean","Diff","P","N"),3), sep=""))

# populate first column - measure names
wb[,1] <- c(net.obj.mat.labs[1,1], net.subj.mat.labs[,1], net.post.mat.labs[,1],net.ind.mat.labs[1])
wh[,1] <- c(net.obj.mat.labs[1,2], net.subj.mat.labs[,2], net.post.mat.labs[,2],net.ind.mat.labs[2])
bh[,1] <- c(net.obj.mat.labs[1,3], net.subj.mat.labs[,3], net.post.mat.labs[,3],net.ind.mat.labs[3])

# check that all vectors are the same length
# length(outvars.a)==length(outvars.b)
# length(outvars.a)==length(outvars.c)
# length(outvars.a)==length(outvars.wb)
# length(outvars.a)==length(outvars.wh)
# length(outvars.a)==length(outvars.bh)

# fill in shell for wb comparison
for(i in 1:length(outvars.a)){
  ## CONTROL GROUP (cols 2-6)
  wb[i,2] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA==0]), na.rm=TRUE) # majority mean
  wb[i,3] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA==0]), na.rm=TRUE) # minority mean
  wb[i,4] <- as.numeric(wb[i,2]) - as.numeric(wb[i,3]) # difference
  #  print(as.numeric(wb[i,2]) - as.numeric(wb[i,3]))
  #  print(mean(as.numeric(dat[[outvars.wb[i]]][dat$insamp==1 & !is.na(dat$insamp) & dat$TA==0]), na.rm=TRUE))
  # PAIRED T-TESTS FOR NET MEASURES WITH NO MISSING DATA, NON-PAIRED OTHERWISE -- CONTROL STRUCTURE CONDITIONS ON VARIABLE
  if(i %in% c(1,7,8)){
    wb[i,5] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.a[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=TRUE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    wb[i,5] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.a[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=FALSE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  
  
  wb[i,6] <- ifelse(length(dat[[outvars.c[i]]][dat$TA==0 & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.a[i]]][dat$TA==0 & !is.na(dat[[outvars.a[i]]])])),
                    length(dat[[outvars.c[i]]][dat$TA==0 & !is.na(dat[[outvars.c[i]]])]),
                    length(dat[[outvars.c[i]]][dat$TA==0 & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.a[i]]]) )])  )# sample size
  
  ## ANY TREATMENT (cols 7-11)
  wb[i,7] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # majority mean
  wb[i,8] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # minority mean
  wb[i,9] <- as.numeric(wb[i,7]) - as.numeric(wb[i,8])  # difference
  if(i %in% c(1,7,8)){
    wb[i,10] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=TRUE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    wb[i,10] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=FALSE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  wb[i,11] <- ifelse(length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.a[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.a[i]]])])),
                     length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.c[i]]])]),
                     length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.a[i]]]) )]) )# sample size
  
  ## EXPERIMENT SAMPLE (cols 12-16)
  wb[i,12] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE)  # majority mean
  wb[i,13] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE) # minority mean
  wb[i,14] <- as.numeric(wb[i,12]) - as.numeric(wb[i,13])  # difference
  if(i %in% c(1,7,8)){
    wb[i,15] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(0,1,2)]),
                       y=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]),
                       alternative="two.sided",
                       paired=TRUE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    wb[i,15] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(0,1,2)]),
                       y=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]),
                       alternative="two.sided",
                       paired=FALSE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  wb[i,16] <-  ifelse(length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.a[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.a[i]]])])),
                      length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.c[i]]])]),
                      length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.a[i]]]) )])  )# sample size
}
## CONTROL GROUP
wb[nrow(wb),4] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA==0]), na.rm=TRUE) # difference
wb[nrow(wb),6] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[1]]])])) # sample size
## ANY TREATMENT
wb[nrow(wb),9] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
wb[nrow(wb),11] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[1]]])])) # sample size
## EXPERIMENT SAMPLE
wb[nrow(wb),14] <- mean(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
wb[nrow(wb),16] <- length(as.numeric(dat[[net.ind.mat[1]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[1]]])]))  # sample size     

# fill in shell for wh comparison
for(i in 1:length(outvars.b)){
  ## CONTROL GROUP (cols 2-6)
  wh[i,2] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA==0]), na.rm=TRUE) # majority mean
  wh[i,3] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA==0]), na.rm=TRUE) # minority mean
  wh[i,4] <- as.numeric(wh[i,2]) - as.numeric(wh[i,3]) # difference
  #  print(as.numeric(wh[i,2]) - as.numeric(wh[i,3]))
  #  print(mean(as.numeric(dat[[outvars.wh[i]]][dat$insamp==1 & !is.na(dat$insamp) & dat$TA==0]), na.rm=TRUE))
  if(i %in% c(1,7,8)){
    wh[i,5] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.b[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=TRUE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    wh[i,5] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.b[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=FALSE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)
    
  }
  wh[i,6] <- ifelse(length(dat[[outvars.c[i]]][dat$TA==0 & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA==0 & !is.na(dat[[outvars.b[i]]])])),
                    length(dat[[outvars.c[i]]][dat$TA==0 & !is.na(dat[[outvars.c[i]]])]),
                    length(dat[[outvars.c[i]]][dat$TA%in%c(0) & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.b[i]]]) )]) )# sample size
  
  ## ANY TREATMENT (cols 7-11)
  wh[i,7] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # majority mean
  wh[i,8] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # minority mean
  wh[i,9] <- as.numeric(wh[i,7]) - as.numeric(wh[i,8])  # difference
  if(i %in% c(1,7,8)){
    wh[i,10] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=TRUE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    wh[i,10] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=FALSE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  wh[i,11] <- ifelse(length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.b[i]]])])),
                     length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.c[i]]])]),
                     length(dat[[outvars.c[i]]][dat$TA%in%c(1,2) & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.b[i]]]) )])  )# sample size
  
  ## EXPERIMENT SAMPLE (cols 12-16)
  wh[i,12] <- mean(as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE)  # majority mean
  wh[i,13] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE) # minority mean
  wh[i,14] <- as.numeric(wh[i,12]) - as.numeric(wh[i,13])  # difference
  wh[i,15] <- t.test(x=as.numeric(dat[[outvars.c[i]]][dat$TA %in% c(0,1,2)]),
                     y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(0,1,2)]),
                     alternative="two.sided",
                     conf.level=0.95)$p.value # p value from t-test (two-sided)
  wh[i,16] <-  ifelse(length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.c[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.b[i]]])])),
                      length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.c[i]]])]),
                      length(dat[[outvars.c[i]]][dat$TA%in%c(0,1,2) & !( is.na(dat[[outvars.c[i]]]) & is.na(dat[[outvars.b[i]]]) )]) )# sample size
}
## CONTROL GROUP
wh[nrow(wh),4] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA==0]), na.rm=TRUE) # difference
wh[nrow(wh),6] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[2]]])])) # sample size
## ANY TREATMENT
wh[nrow(wh),9] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
wh[nrow(wh),11] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[2]]])])) # sample size
## EXPERIMENT SAMPLE
wh[nrow(wh),14] <- mean(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
wh[nrow(wh),16] <- length(as.numeric(dat[[net.ind.mat[2]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[2]]])]))  # sample size     

# fill in shell for bh comparison
for(i in 1:length(outvars.b)){
  ## CONTROL GROUP (cols 2-6)
  bh[i,2] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA==0]), na.rm=TRUE) # majority mean
  bh[i,3] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA==0]), na.rm=TRUE) # minority mean
  bh[i,4] <- as.numeric(bh[i,2]) - as.numeric(bh[i,3]) # difference
  #  print(as.numeric(bh[i,2]) - as.numeric(bh[i,3]))
  #  print(mean(as.numeric(dat[[outvars.bh[i]]][dat$insamp==1 & !is.na(dat$insamp) & dat$TA==0]), na.rm=TRUE))
  if(i %in% c(1,7,8)){
    bh[i,5] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.b[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=TRUE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    bh[i,5] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA==0]),
                      y=as.numeric(dat[[outvars.b[i]]][dat$TA==0]),
                      alternative="two.sided",
                      paired=FALSE,
                      conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  bh[i,6] <- ifelse(length(dat[[outvars.a[i]]][dat$TA==0 & !is.na(dat[[outvars.a[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA==0 & !is.na(dat[[outvars.b[i]]])])),
                    length(dat[[outvars.a[i]]][dat$TA==0 & !is.na(dat[[outvars.a[i]]])]),
                    length(dat[[outvars.a[i]]][dat$TA%in%c(0) & !( is.na(dat[[outvars.a[i]]]) & is.na(dat[[outvars.b[i]]]) )])  )# sample size
  
  ## ANY TREATMENT (cols 7-11)
  bh[i,7] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # majority mean
  bh[i,8] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]), na.rm=TRUE)  # minority mean
  bh[i,9] <- as.numeric(bh[i,7]) - as.numeric(bh[i,8])  # difference
  if(i %in% c(1,7,8)){
    bh[i,10] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=TRUE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    bh[i,10] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(1,2)]),
                       alternative="two.sided",
                       paired=FALSE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  }
  bh[i,11] <- ifelse(length(dat[[outvars.a[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.a[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.b[i]]])])),
                     length(dat[[outvars.a[i]]][dat$TA%in%c(1,2) & !is.na(dat[[outvars.a[i]]])]),
                     length(dat[[outvars.a[i]]][dat$TA%in%c(1,2) & !( is.na(dat[[outvars.a[i]]]) & is.na(dat[[outvars.b[i]]]) )])  )# sample size
  
  ## EXPERIMENT SAMPLE (cols 12-16)
  bh[i,12] <- mean(as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE)  # majority mean
  bh[i,13] <- mean(as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(0,1,2)]), na.rm=TRUE) # minority mean
  bh[i,14] <- as.numeric(bh[i,12]) - as.numeric(bh[i,13])  # difference
  if(i %in% c(1,7,8)){
    bh[i,15] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(0,1,2)]),
                       alternative="two.sided",
                       paired=TRUE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)
  } else {
    bh[i,15] <- t.test(x=as.numeric(dat[[outvars.a[i]]][dat$TA %in% c(0,1,2)]),
                       y=as.numeric(dat[[outvars.b[i]]][dat$TA %in% c(0,1,2)]),
                       alternative="two.sided",
                       paired=FALSE,
                       conf.level=0.95)$p.value # p value from t-test (two-sided)    
  }
  bh[i,16] <-  ifelse(length(dat[[outvars.a[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.a[i]]])])==length(as.numeric(dat[[outvars.b[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.b[i]]])])),
                      length(dat[[outvars.a[i]]][dat$TA%in%c(0,1,2) & !is.na(dat[[outvars.a[i]]])]),
                      length(dat[[outvars.a[i]]][dat$TA%in%c(0,1,2) & !( is.na(dat[[outvars.a[i]]]) & is.na(dat[[outvars.b[i]]]) )])  )# sample size
}
## CONTROL GROUP
bh[nrow(bh),4] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA==0]), na.rm=TRUE) # difference
bh[nrow(bh),6] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0) & !is.na(dat[[net.ind.mat[3]]])])) # sample size
## ANY TREATMENT
bh[nrow(bh),9] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(1,2)]), na.rm=TRUE) # difference
bh[nrow(bh),11] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(1,2) & !is.na(dat[[net.ind.mat[3]]])])) # sample size
## EXPERIMENT SAMPLE
bh[nrow(bh),14] <- mean(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0,1,2)]), na.rm=TRUE) # difference
bh[nrow(bh),16] <- length(as.numeric(dat[[net.ind.mat[3]]][dat$TA%in%c(0,1,2) & !is.na(dat[[net.ind.mat[3]]])]))  # sample size     


for(i in 2:ncol(wb)){
  wb[,i] <- round(as.numeric(wb[,i]), 3)
  wh[,i] <- round(as.numeric(wh[,i]), 3)
  bh[,i] <- round(as.numeric(bh[,i]), 3)
  
  if(i %in% c(5,10,15)) {
    wb[,i] <- paste("(",wb[,i],")",sep="")
    wh[,i] <- paste("(",wh[,i],")",sep="")
    bh[,i] <- paste("(",bh[,i],")",sep="")
  }
  if(i %in% c(6,11,16)) {
    wb[,i] <- paste("[",wb[,i],"]",sep="")
    wh[,i] <- paste("[",wh[,i],"]",sep="")
    bh[,i] <- paste("[",bh[,i],"]",sep="")
  }
}



out <- rbind(wb[c(1,9,2:8),],
             wh[c(1,9,2:8),],
             bh[c(1,9,2:8),])

out <- out[-c(2,11,20),] # get rid of index measure (it makes no sense to include it because it is standardized to the control group mean)

print(xtable(out, caption=c("Baseline Incidence of Discrimination: In-Person and Post-Visit",
                            "Baseline Incidence of Discrimination: In-Person and Post-Visit")), 
      file="out_tablea6_baseline_discrim.tex",
      hline.after=c(0,8,16,24), 
      include.rownames=FALSE)

kable(out[,c(1:6)], caption="**Table A6, Panel I: Cases Assigned to Control Group**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
kable(out[,c(1, 7:11)], caption="**Table A6, Panel II: Cases Assigned to Any Treatment Group**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
kable(out[,c(1, 12:16)], caption="**Table A6, Panel III: All Cases in Experimental Sample**", col.names=c("Measure", "Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]"))
```

## Table A7 (p. A-25): Estimated Effects of Messaging on Net Discrimination Levels

```{r tablea7, echo=T, eval=T, warning=F}
# ---------------------------------------------------------------------- #
# DEFINE COLNAMES FOR OUTPUT TABLES

col.labels <- c("Outcome",c("Estimate","SE","t","P"))

# ---------------------------------------------------------------------- #
# MODELS WITH ONLY BLOCK FIXED EFFECTS

itt.wb.mc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pm <- matrix(NA, nrow=length(outvars.wb), ncol=5)

itt.wh.mc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pm <- matrix(NA, nrow=length(outvars.wh), ncol=5)

itt.bh.mc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pm <- matrix(NA, nrow=length(outvars.bh), ncol=5)

fit.wb.mc <- fit.wb.pc <- fit.wb.pm <- fit.wh.mc <- fit.wh.pc <- fit.wh.pm <- fit.bh.mc <- fit.bh.pc <- fit.bh.pm <- list() # store results from ITT est w/ block FE (no other covs)

for(i in 1:length(outvars.wb)){  
  model.mc <- paste(outvars.wb[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")

  fit.wb.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wb.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wb.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
  
  itt.wb.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wb.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wb.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
    
  itt.wb.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wb.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
}

for(i in 1:length(outvars.wh)){  
  model.mc <- paste(outvars.wh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.wh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
  
  itt.wh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
  
  itt.wh.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wh.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
}

for(i in 1:length(outvars.bh)){  
  model.mc <- paste(outvars.bh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.bh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.bh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.bh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
  
  itt.bh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.bh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.bh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
}

# Add outcome measure labels
itt.wb.mc[,1] <- outvars.wb.labs
itt.wb.pc[,1] <- outvars.wb.labs
itt.wb.pm[,1] <- outvars.wb.labs

itt.wh.mc[,1] <- outvars.wh.labs
itt.wh.pc[,1] <- outvars.wh.labs
itt.wh.pm[,1] <- outvars.wh.labs

itt.bh.mc[,1] <- outvars.bh.labs
itt.bh.pc[,1] <- outvars.bh.labs
itt.bh.pm[,1] <- outvars.bh.labs

# Add column names
colnames(itt.wb.mc) <- colnames(itt.wb.pc) <- colnames(itt.wb.pm) <- col.labels
colnames(itt.wh.mc) <- colnames(itt.wh.pc) <- colnames(itt.wh.pm) <- col.labels
colnames(itt.bh.mc) <- colnames(itt.bh.pc) <- colnames(itt.bh.pm) <- col.labels

# Stack, create output table
out2 <- rbind(itt.wb.mc, itt.wh.mc, itt.bh.mc,
              itt.wb.pc, itt.wh.pc, itt.bh.pc,
              itt.wb.pm, itt.wh.pm, itt.bh.pm)

out2 <- out2[-seq(1,33,4),]

#----- CIs -----#

df.wb.mc <- unlist(lapply(fit.wb.mc, function(x) summary(x)$df[2]))[2:4]
df.wb.pc <- unlist(lapply(fit.wb.pc, function(x) summary(x)$df[2]))[2:4]
df.wb.pm <- unlist(lapply(fit.wb.pm, function(x) summary(x)$df[2]))[2:4]

df.wh.mc <- unlist(lapply(fit.wh.mc, function(x) summary(x)$df[2]))[2:4]
df.wh.pc <- unlist(lapply(fit.wh.pc, function(x) summary(x)$df[2]))[2:4]
df.wh.pm <- unlist(lapply(fit.wh.pm, function(x) summary(x)$df[2]))[2:4]

df.bh.mc <- unlist(lapply(fit.bh.mc, function(x) summary(x)$df[2]))[2:4]
df.bh.pc <- unlist(lapply(fit.bh.pc, function(x) summary(x)$df[2]))[2:4]
df.bh.pm <- unlist(lapply(fit.bh.pm, function(x) summary(x)$df[2]))[2:4]

df.2g.10 <- c(df.wb.mc, df.wh.mc, df.bh.mc)
df.2g.20 <- c(df.wb.pc, df.wh.pc, df.bh.pc)
df.2g.21 <- c(df.wb.pm, df.wh.pm, df.bh.pm)

cv.2g.10 <- qt(p=0.025, df=df.2g.10, lower.tail=TRUE)
cv.2g.20 <- qt(p=0.025, df=df.2g.20, lower.tail=TRUE)
cv.2g.21 <- qt(p=0.025, df=df.2g.21, lower.tail=TRUE)

cv.2g <- c(cv.2g.10, cv.2g.20, cv.2g.21)
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))

# without rounding
#cbind(apply(itt.2g.ci, 1, function(x) paste("[", x[1] ,", ", x[2], "]", sep="")))

# with rounding
#cbind(apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep="")))

# Round and format results
for(i in 2:ncol(out2)) out2[,i] <- round(as.numeric(out2[,i]),3)
for(i in 5) out2[,i] <- paste("(",out2[,i],")",sep="")

# Assemble results

itt.2g.tab <- cbind(out2, cbind(apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))))
colnames(itt.2g.tab) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")

# Save results
write.csv(itt.2g.tab, "out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", row.names=FALSE)

kable(itt.2g.tab[1:9,], caption="**Table A7, Panel I. Monitoring vs. Control**")
kable(itt.2g.tab[10:18,], caption="**Table A7, Panel II. Punitive vs. Control**")
kable(itt.2g.tab[19:27,], caption="**Table A7, Panel III. Punitive vs. Monitoring**")
```

```{r, echo=F, eval=T, warning=F}
# Some helper code for formatting (do not display output)
start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)
```


```{r, echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

# Format table
tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           itt.2g.tab[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), itt.2g.tab[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels]{Estimated Effects of Messaging on Net Discrimination Levels. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Estimated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov}",
         out[50])

cat(out, file="out_tablea7_itt_2g_blockfe_nocovs.tex", sep="\n")
write.csv(tab.out2, file="out_tablea7_itt_2g_blockfe_nocovs.csv", row.names=FALSE)
```

```{r prepittestforplot, eval=T, echo=F, warning=F}
# Prepare ITT estimates for plot
# extract info from main ITT estimates (2-group parametric w/ block FE and ipw, no addl covariates) to populate graph
itt.est <- read.csv("out_tablea7_itt_2g_blockfe_nocovs_UNFORMATTED.csv", header=TRUE, stringsAsFactors=FALSE)

itt.est$lower.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("[", "", x[1], fixed=TRUE)))
itt.est$upper.ci <- as.numeric(sapply(strsplit(itt.est[,6], ", "), function(x) gsub("]", "", x[2], fixed=TRUE)))

row_labels <- c("W-B", "W-H", "B-H")
col_labels <- c("M-C", "P-C", "P-M")

cb.itt <- itt.est[grepl("callback", itt.est[,1], fixed=TRUE),]
off.itt <- itt.est[grepl("offer", itt.est[,1], fixed=TRUE),]
meindex.itt <- itt.est[grepl("Index", itt.est[,1], fixed=TRUE),]

cb.reg <- list(coef=matrix(cb.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
               se=matrix(cb.itt$SE, nrow=3, ncol=3, byrow=FALSE),
               lower=matrix(cb.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
               upper=matrix(cb.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

off.reg <- list(coef=matrix(off.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
                se=matrix(off.itt$SE, nrow=3, ncol=3, byrow=FALSE),
                lower=matrix(off.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
                upper=matrix(off.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

meindex.reg <- list(coef=matrix(meindex.itt$Estimate, nrow=3, ncol=3, byrow=FALSE),
                    se=matrix(meindex.itt$SE, nrow=3, ncol=3, byrow=FALSE),
                    lower=matrix(meindex.itt$lower.ci, nrow=3, ncol=3, byrow=FALSE),
                    upper=matrix(meindex.itt$upper.ci, nrow=3, ncol=3, byrow=FALSE))

cv.mat <- cbind(c(rep(c("index", "cb", "off"), times=3)),
      c(rep("wb", 3), rep("wh", 3), rep("bh", 3)),
      abs(cv.2g.10),
      abs(cv.2g.20),
      abs(cv.2g.21))

cv.reg.meindex <- cv.mat[cv.mat[,1]=="index",3:5]
cv.reg.cb <- cv.mat[cv.mat[,1]=="cb",3:5]
cv.reg.off <- cv.mat[cv.mat[,1]=="off",3:5]
```

## Table A8 (p. A-26): Unweighted ITT Estimates of Messaging on Net Discrimination in Receiving a Post-Visit Callback

```{r tablea8, echo=T, warning=F}
# top left
cb1 <- ddply(dat, .(TA), summarise,
              mean.cb.w = mean(cb_C),
              mean.cb.b = mean(cb_A),
              mean.cb.h = mean(cb_B))
cb1 <- t(cb1[,2:4])
colnames(cb1) <- c("Control","Monitoring","Punitive")

# bottom left
cb2 <- ddply(dat, .(TA), summarise,
             mean.ncb.wb = mean(ncb_wb),
             mean.ncb.wh = mean(ncb_wh),
             mean.ncb.bh = mean(ncb_bh))
cb2 <- t(cb2[,2:4])

# top right
cb3 <- cbind(cb1[,2]-cb1[,1], cb1[,3]-cb1[,1], cb1[,3]-cb1[,2])

# bottom right
cb4 <- cbind(cb2[,2]-cb2[,1], cb2[,3]-cb2[,1], cb2[,3]-cb2[,2])

cb1 <- round(cb1,3)
cb2 <- round(cb2,3)
cb3 <- round(cb3,3)
cb4 <- round(cb4,3)

# p values
get_p <- function(var1, var2, T_r, T_c){
  ps <- c(t.test(dat[dat$TA==0, var1],
                 dat[dat$TA==0, var2], 
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==1, var1],
                 dat[dat$TA==1, var2],
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var1],
                 dat[dat$TA==2, var2], 
                 alternative="two.sided",
                 paired=TRUE,
                 conf.level=0.95)$p.value)
  return(ps)
  }

p2 <- rbind(get_p("cb_C","cb_A"),
            get_p("cb_C","cb_B"),
            get_p("cb_A","cb_B"))

p2 <- matrix(paste("(",round(p2,3),")",sep=""), ncol=3, byrow=FALSE)

#p-value within race across treatments
get_p2 <- function(var){
  ps <- c(t.test(dat[dat$TA==1, var],
                 dat[dat$TA==0, var], 
                 alternative="two.sided",
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var],
                 dat[dat$TA==0, var],
                 alternative="two.sided",
                 conf.level=0.95)$p.value,
          t.test(dat[dat$TA==2, var],
                 dat[dat$TA==1, var], 
                 alternative="two.sided",
                 conf.level=0.95)$p.value)
  return(ps)
}

p3 <- rbind(get_p2("cb_C"), get_p2("cb_A"), get_p2("cb_B"))
p3 <- matrix(paste("(",round(p3,3),")",sep=""), ncol=3, byrow=FALSE)

f0 <- lm(ncb_wb ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_wb ~ TA0 + TA2, data=dat)
p4.wb <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(ncb_wh ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_wh ~ TA0 + TA2, data=dat)
p4.wh <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(ncb_bh ~ TA1 + TA2, data=dat)
f1 <- lm(ncb_bh ~ TA0 + TA2, data=dat)
p4.bh <- c(summary(f0)$coefficients[2:3,4],
           summary(f1)$coefficients[3,4])

p4 <- rbind(p4.wb, p4.wh, p4.bh)
p4 <- matrix(paste("(",round(p4,3),")",sep=""), ncol=3, byrow=FALSE)

p1 <- matrix(NA, ncol=3, nrow=3)

cb1 <- rbind(cb1[1,], p1[1,],
             cb1[2,], p1[2,],
             cb1[3,], p1[3,])
cb2 <- rbind(cb2[1,], p2[1,],
             cb2[2,], p2[2,],
             cb2[3,], p2[3,])
cb3 <- rbind(cb3[1,], p3[1,],
             cb3[2,], p3[2,],
             cb3[3,], p3[3,])
cb4 <- rbind(cb4[1,], p4[1,],
             cb4[2,], p4[2,],
             cb4[3,], p4[3,])

table_a8 <- cbind(c("White",NA,"Black",NA,"Hispanic",NA,"White vs. Black",NA,"White vs. Hispanic",NA,"Black vs. Hispanic",NA),
             rbind(cbind(cb1, cb3), cbind(cb2, cb4)))
table_a8 <- rbind(table_a8, 
                  c("Sample size", nrow(dat[dat$TA == 0,]), nrow(dat[dat$TA == 1,]), 
                       nrow(dat[dat$TA == 2,]), nrow(dat[dat$TA %in% c(0,1),]), 
                       nrow(dat[dat$TA %in% c(0,2),]), nrow(dat[dat$TA %in% c(1,2),])))

colnames(table_a8) <- c("","Control", "Monitoring", "Punitive", "Monitoring vs Control", "Punitive vs Control", "Punitive vs Monitoring")

print(xtable(table_a8), file="out_tablea8_unweighted_itt_callbacks.tex", include.rownames=FALSE)
kable(table_a8[1:6,], caption="**Table A8, Panel A. Percent Favorable**")
kable(table_a8[7:12,], caption="**Table A8, Panel B. Net Discrimination (% Majority Favorable - % Minority Favorable)**")
kable(rbind(table_a8[13,]), caption="**Table A8. Sample Size (last row)**")
```

## Table A9 (p. A-27): Unweighted ITT Estimates of Messaging on Net Discrimination in Receiving a Post-Visit Offer for the Unit

```{r tablea9, echo=T, warning=F}
# top left
off1 <- ddply(dat, .(TA), summarise,
              mean.off.w = mean(off_C),
              mean.off.b = mean(off_A),
              mean.off.h = mean(off_B))
off1 <- t(off1[,2:4])
colnames(off1) <- c("Control","Monitoring","Punitive")

# bottom left
off2 <- ddply(dat, .(TA), summarise,
              mean.noff.wb = mean(noff_wb),
              mean.noff.wh = mean(noff_wh),
              mean.noff.bh = mean(noff_bh))
off2 <- t(off2[,2:4])

# top right
off3 <- cbind(off1[,2]-off1[,1], off1[,3]-off1[,1], off1[,3]-off1[,2])

# bottom right
off4 <- cbind(off2[,2]-off2[,1], off2[,3]-off2[,1], off2[,3]-off2[,2])

off1 <- round(off1,3)
off2 <- round(off2,3)
off3 <- round(off3,3)
off4 <- round(off4,3)

# p values

p2 <- rbind(get_p("off_C","off_A"),
            get_p("off_C","off_B"),
            get_p("off_A","off_B"))

p2 <- matrix(paste("(",round(p2,3),")",sep=""), ncol=3, byrow=FALSE)

p3 <- rbind(get_p2("off_C"),
            get_p2("off_A"),
            get_p2("off_B"))

p3 <- matrix(paste("(",round(p3,3),")",sep=""), ncol=3, byrow=FALSE)

f0 <- lm(noff_wb ~ TA1 + TA2, data=dat)
f1 <- lm(noff_wb ~ TA0 + TA2, data=dat)
p4.wb <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(noff_wh ~ TA1 + TA2, data=dat)
f1 <- lm(noff_wh ~ TA0 + TA2, data=dat)
p4.wh <- c(pt(q=summary(f0)$coefficients[,3], df=f0$df.residual, lower.tail=TRUE)[2:3],
           summary(f1)$coefficients[3,4])

f0 <- lm(noff_bh ~ TA1 + TA2, data=dat)
f1 <- lm(noff_bh ~ TA0 + TA2, data=dat)
p4.bh <- c(summary(f0)$coefficients[2:3,4],
           summary(f1)$coefficients[3,4])

p4 <- rbind(p4.wb, p4.wh, p4.bh)
p4 <- matrix(paste("(",round(p4,3),")",sep=""), ncol=3, byrow=FALSE)

p1 <- matrix(NA, ncol=3, nrow=3)

off1 <- rbind(off1[1,], p1[1,],
              off1[2,], p1[2,],
              off1[3,], p1[3,])
off2 <- rbind(off2[1,], p2[1,],
              off2[2,], p2[2,],
              off2[3,], p2[3,])
off3 <- rbind(off3[1,], p3[1,],
              off3[2,], p3[2,],
              off3[3,], p3[3,])
off4 <- rbind(off4[1,], p4[1,],
              off4[2,], p4[2,],
              off4[3,], p4[3,])

table_a9 <- cbind(c("White",NA,"Black",NA,"Hispanic",NA,"White vs. Black",NA,"White vs. Hispanic",NA,"Black vs. Hispanic",NA),
             rbind(cbind(off1, off3), cbind(off2, off4)))
table_a9 <- rbind(table_a9, 
                  c("Sample size", nrow(dat[dat$TA == 0,]), nrow(dat[dat$TA == 1,]), 
                       nrow(dat[dat$TA == 2,]), nrow(dat[dat$TA %in% c(0,1),]), 
                       nrow(dat[dat$TA %in% c(0,2),]), nrow(dat[dat$TA %in% c(1,2),])))
colnames(table_a9) <- c("","Control", "Monitoring", "Punitive", "Monitoring vs Control", "Punitive vs Control", "Punitive vs Monitoring")

print(xtable(table_a9), file="out_tablea9_unweighted_itt_offers.tex", include.rownames=FALSE)
kable(table_a9[1:6,], caption="**Table A9, Panel A. Percent Favorable**")
kable(table_a9[7:12,], caption="**Table A9, Panel B. Net Discrimination (% Majority Favorable - % Minority Favorable)**")
kable(rbind(table_a9[13,]), caption="**Table A9. Sample Size (last row)**")
```

## Table A10 (p. A-28): Sensitivity Analysis: Estimated Effects of Messaging on Net Discrimination Levels (Three-Group Parametric Estimator)

```{r tablea10, echo=T, eval=T, warning=F}
  #one-sided p function
  get_plower <- function(x) pt(summary(x)$coefficients[,3], summary(x)$df[2], lower = TRUE)
  
  # select all outcome variables
  outpatt <- c("^index","ncb","noff")
  outvar <- unlist(lapply(outpatt, function(y) grep(y, names(dat), value = TRUE)))
  
  datm <- reshape2::melt(dat, id = names(dat)[!names(dat)%in% c(outvar)],
                         variable.name = "outcome", 
                         value.name = "outcome_val")

  # control as comparison

  # monitoring vs. control
  dat_out_c <- datm %>% 
    group_by(outcome) %>%
    do(fit = lm(outcome_val ~ TA1 + TA2 + as.factor(block), weights=ipw, data=.))

  dat_outcome_coef <- tidy(dat_out_c, fit)

  #one-sided p-value
  dat_out_c$p_lower <- lapply(dat_out_c$fit, get_plower)
  p_lower <- tidy(dat_out_c, p_lower) %>% dplyr::rename(term = names) %>% dplyr::rename(p.lower = x)

  dat_coef <- left_join(dat_outcome_coef, p_lower, by = c("outcome","term"))

  # replace with one-sided p.value for all but B-H comparison
  dat_coef$p.value[grep("bh$", dat_coef$outcome, invert = T)] <- dat_coef$p.lower[grep("bh$", dat_coef$outcome, invert = T)]

  itt3g_mc <- dat_coef[dat_coef$term == "TA1",] %>% dplyr::select(-p.lower, -term)
  itt3g_mc$df <- unlist(lapply(dat_out_c$fit, function(x) summary(x)$df[2]))

  # punitive vs. control
  itt3g_pc <- dat_coef[dat_coef$term == "TA2",] %>% dplyr::select(-p.lower, -term)
  itt3g_pc$df <- unlist(lapply(dat_out_c$fit, function(x) summary(x)$df[2]))
  
  # monitoring as comparison

  # punitive vs monitoring
  dat_out_m <- datm %>%
    group_by(outcome) %>%
    do(fit = lm(outcome_val ~ TA0 + TA2 + as.factor(block), weights=ipw, data=.))

  dat_outcome_coef <- tidy(dat_out_m, fit)

  #one-sided p-value
  dat_out_m$p_lower <- lapply(dat_out_m$fit, get_plower)
  p_lower <- tidy(dat_out_m, p_lower) %>% rename(term = names) %>% rename(p.lower = x)

  dat_coef <- left_join(dat_outcome_coef, p_lower, by = c("outcome","term"))
  itt3g_pm <- dat_coef[dat_coef$term == "TA2",] %>% dplyr::select(-term, -p.lower)
  itt3g_pm$df <- unlist(lapply(dat_out_m$fit, function(x) summary(x)$df[2]))

  r_ord <- c(7,2,5,8,3,6,9,1,4)

  out3 <- rbind(itt3g_mc[r_ord,], itt3g_pc[r_ord,], itt3g_pm[r_ord,])
  out3$outcome <- rep(c(outvars.wb.labs[-1], outvars.wh.labs[-1], outvars.bh.labs[-1]), 3)

itt.3g.ci <- as.numeric(out3$estimate) + abs(qt(p=0.025, df=out3$df, lower.tail=TRUE))*cbind(-as.numeric(out3$std.error), as.numeric(out3$std.error))
itt.3g.cir <- apply(itt.3g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))

# Join with CI
itt.3g.tab <- cbind(as.matrix(out3[,-6]), itt.3g.cir)

# Round and format results
for(i in 2:5) itt.3g.tab[,i] <- round(as.numeric(itt.3g.tab[,i]), 3)
itt.3g.tab[,5] <- paste("(",itt.3g.tab[,5],")",sep="")

colnames(itt.3g.tab) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")
write.csv(itt.3g.tab, file="out_tablea10_itt_blockfe_nocovs.csv", row.names=FALSE)

kable(itt.3g.tab[1:9,], caption="**Table A10, Panel I. Monitoring vs. Control**")
kable(itt.3g.tab[10:18,], caption="**Table A10, Panel II. Punitive vs. Control**")
kable(itt.3g.tab[19:27,], caption="**Table A10, Panel III. Punitive vs. Monitoring**")
```

```{r, echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
#start.rows; stop.rows

Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)


tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           itt.2g.tab[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), itt.3g.tab[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

row.names(tab.out2) <- NULL
out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Es- timated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control compar- isons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov_3g}",
         out[50])

# write in .tex and .csv
cat(out, file="out_tablea10_itt_blockfe_nocovs.tex", include.rownames=FALSE)

```

## Table A11 (p. A-29): Predicted Means, Differences, and Percent Differences from Two-Group Parametric Estimators

```{r tablea11, echo=T, eval=T, warning=F}
ccdat <- data.frame(TA1=0, TA2=0, block=1:17)
mcdat <- data.frame(TA1=1, TA2=0, block=1:17)
pcdat <- data.frame(TA1=0, TA2=1, block=1:17)

cmdat <- data.frame(TA0=0, TA2=0, block=1:17)
pmdat <- data.frame(TA0=0, TA2=1, block=1:17)
cmdat.i2 <- data.frame(TA0=0, TA2=0, block=c(1:4,6:17))
pmdat.i2 <- data.frame(TA0=0, TA2=1, block=c(1:4,6:17))

fit.wb.mc <- fit.wb.pc <- fit.wb.pm <- fit.wh.mc <- fit.wh.pc <- fit.wh.pm <- fit.bh.mc <- fit.bh.pc <- fit.bh.pm <- list() # store results from ITT est w/ block FE (no other covs)

for(i in 1:length(outvars.wb)){  
  
  model.mc <- paste(outvars.wb[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.wb.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wb.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wb.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
}

for(i in 1:length(outvars.wh)){  
  
  model.mc <- paste(outvars.wh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.wh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.wh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.wh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
}

for(i in 1:length(outvars.bh)){  
  
  model.mc <- paste(outvars.bh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.bh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1),], weights=ipw10)
  fit.bh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2),], weights=ipw20)
  fit.bh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2),], weights=ipw21)
}

pred.vals <- list()

for(i in 1:4){
  
  #--------- m-c ---------#
  # wb
  mc.wb <- c(mean(predict(fit.wb.mc[[i]], mcdat)), mean(predict(fit.wb.mc[[i]], ccdat)))
  # wh
  mc.wh <- c(mean(predict(fit.wh.mc[[i]], mcdat)), mean(predict(fit.wh.mc[[i]], ccdat)))
  # bh
  mc.bh <- c(mean(predict(fit.bh.mc[[i]], mcdat)), mean(predict(fit.bh.mc[[i]], ccdat)))
  
  #--------- p-c ---------#
  # wb
  pc.wb <- c(mean(predict(fit.wb.pc[[i]], pcdat)), mean(predict(fit.wb.pc[[i]], ccdat)))
  # wh
  pc.wh <- c(mean(predict(fit.wh.pc[[i]], pcdat)), mean(predict(fit.wh.pc[[i]], ccdat)))
  # bh
  pc.bh <- c(mean(predict(fit.bh.pc[[i]], pcdat)), mean(predict(fit.bh.pc[[i]], ccdat)))
  
  #--------- p-m ---------#
  
  if(i != 2){
    # wb
    pm.wb <- c(mean(predict(fit.wb.pm[[i]], pmdat)), mean(predict(fit.wb.pm[[i]], cmdat)))
    # w
    pm.wh <- c(mean(predict(fit.wh.pm[[i]], pmdat)), mean(predict(fit.wh.pm[[i]], cmdat)))
    # bh
    pm.bh <- c(mean(predict(fit.bh.pm[[i]], pmdat)), mean(predict(fit.bh.pm[[i]], cmdat)))
  } else {
    # wb
    pm.wb <- c(mean(predict(fit.wb.pm[[i]], pmdat.i2)), mean(predict(fit.wb.pm[[i]], cmdat.i2)))
    # w
    pm.wh <- c(mean(predict(fit.wh.pm[[i]], pmdat.i2)), mean(predict(fit.wh.pm[[i]], cmdat.i2)))
    # bh
    pm.bh <- c(mean(predict(fit.bh.pm[[i]], pmdat.i2)), mean(predict(fit.bh.pm[[i]], cmdat.i2)))
    
  }
  
  pred.vals[[i]] <- rbind(mc.wb, mc.wh, mc.bh,
                          pc.wb, pc.wh, pc.bh,
                          pm.wb, pm.wh, pm.bh)
  
}

names(pred.vals) <- c("meet","index","callback","offer")

pred.mat <- NULL

for(i in 1:nrow(pred.vals[[1]])){
  for(j in 2:length(pred.vals)){
    pred.mat <- rbind(pred.mat, pred.vals[[j]][i,])
  }
}

pred.out <- cbind(pred.mat, pred.mat[,1]-pred.mat[,2], (pred.mat[,1]-pred.mat[,2])/abs(pred.mat[,2])*100)
colnames(pred.out) <- c("Predicted Treatment Mean", "Predicted Control Mean", "Difference", "Percent Difference")
for(i in 1:ncol(pred.out)) pred.out[,i] <- round(pred.out[,i], digits=3)

pred.out <- cbind(Outcome=itt.2g.tab[,1], pred.out)

pred.out2 <- list()
seclabs.index <- 0

for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    pred.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,4)),
                           c(sublabs[i], rep(NA,4)),
                           pred.out[start.rows[i]:stop.rows[i],])        
  } else {
    pred.out2[[i]] <- rbind(c(sublabs[i], rep(NA,4)), pred.out[start.rows[i]:stop.rows[i],])    
  }
}

pred.out2 <- do.call(rbind, pred.out2)

kable(pred.out2, caption="**Table A11**")
```

```{r, echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table and export to tex

out <- print(xtable(pred.out2, align="llrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.85\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption{Predicted Means, Difference, and Percent Difference from Two-Group Parametric Estimators. Predicted values calculated using estimates from OLS models with inverse probability weights and block fixed effects.}",
         "\\label{predmeans_2g}",
         out[50])

# write in .tex and .csv
cat(out, file="out_tablea11_pred_means_2g.tex", sep="\n", print = F)
```


## Table A12 (p. A-31): Incidence of Early Stage Discrimination: Subjective Measures

```{r tablea12, echo=T, warning=F}

table_a12 <- es_discrim_full_table[-c(1,2,14,15,27,28),]

print(xtable(table_a12, caption=c("Incidence of Early Stage Discrimination: Subjective Measures",
                            "Incidence of Early Stage Discrimination: Subjective Measure")),
      file="out_tablea12_early_stage_discrim_subjective.tex",
      include.rownames=FALSE)

table_a12_audit <- es_discrim_full_table[-c(1,2,14,15,27,28),1:6]
table_a12_expsamp <- es_discrim_full_table[-c(1,2,14,15,27,28),c(1, 7:11)]
table_a12_control <- es_discrim_full_table[-c(1,2,14,15,27,28),c(1, 12:16)]

colnames(table_a12_audit)[2:6] <- colnames(table_a12_expsamp)[2:6] <- colnames(table_a12_control)[2:6] <- c("Majority Group Mean", "Minority Group Mean", "Difference (Maj-Min)", "p-value", "[N]")

kable(table_a12_audit, caption="**Table A12, Panel I. All Pursued Cases in Audit Sample**")
kable(table_a12_expsamp, caption="**Table A12, Panel II. All Pursued Cases in Experimental Sample**")
kable(table_a12_control, caption="**Table A12, Panel III. All Pursued Cases in Control Group**")
```

## Table A13 (p. A-32): Treatment Noncompliance Incidence

```{r tablea13, echo=T, warning=F}
nc.counts <- table(dat$TA, dat$TD, useNA="ifany")
nc.props <- table(dat$TA, dat$TD, useNA="ifany")/rowSums(table(dat$TA, dat$TD, useNA="ifany"))

table_a13 <- cbind(nc.counts[,1], nc.props[,1],
              nc.counts[,2], nc.props[,2],
              nc.counts[,3], nc.props[,3],
              rowSums(table(dat$TA, dat$TD, useNA="ifany")))

table_a13[,c(2,4,6)] <- round(table_a13[,c(2,4,6)], digits=2)
table_a13 <- cbind(c("Control","Monitoring","Punitive"), table_a13)
colnames(table_a13) <- c("Assigned Arm",
                        "Control (N)", "Control (Proportion)", 
                        "Monitoring (N)", "Monitoring (Proportion)",
                        "Punitive (N)", "Punitive (Proportion)", "Row Totals")

print(xtable(table_a13), file="out_tablea13_noncompliance_incidence.tex", include.rownames=FALSE)
rownames(table_a13) <- NULL
kable(table_a13, caption="**Table A13**")
```

## Table A14 (p. A-34): Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels

```{r tablea14, echo=T, warning=F}

# DEFINE PAIRS OF TREATMENT-COMPARISON DIFFERENCES OF INTEREST
diffs <- rbind(c(1,0),
               c(2,0),
               c(2,1))
colnames(diffs) <- c("treatment","comparison")

# ---------------------------------------------------------------------- #
# Code treatment receipt two ways for P vs C comparison
# Alt Approach A: treat Z=punitive D=monitoring as fail-to-treat
# Alt Approach B: treat Z=punitive D=monitoring as effectively punitive messaging
dat$TD_a <- dat$TD_b <- dat$TD
dat$TD_a <- with(dat, ifelse(TA==2 & TD==1, 0, TD_a))
dat$TD_b <- with(dat, ifelse(TA==2 & TD==1, 2, TD_b))

# ---------------------------------------------------------------------- #
# CACE helper function
# inputs:
#  -- dat: data frame
#  -- Y: name of outcome variable
#  -- D: name of treatment receipt variable
#  -- trt_t: value of treatment arm (e.g., 2 for punitive)
#  -- trt_c: value of comparison arm (e.g., 0 for control)

cace.est <- function(dat, Y, D, trt_t, trt_c){
  sub <- dat[dat$TA %in% c(trt_t, trt_c), ]

  sub$Z <- ifelse(sub$TA==trt_t, 1, 0)
  sub$D <- ifelse(sub[[D]]==trt_t, 1, 0)

  sub$wtvar <- sub[[paste0("ipw",trt_t,trt_c)]]
  
  itt.model <- paste(Y, "~ Z + as.factor(block)")
  ittd.model <- "D ~ Z + as.factor(block)"
  iv.model <- paste(Y, "~ D + as.factor(block) | Z + as.factor(block)")
  
  itt.fit <- lm(formula=itt.model, data=sub, weights=wtvar)
  ittd.fit <- lm(formula=ittd.model, data=sub, weights=wtvar)
  iv.fit <- ivreg(formula=iv.model, data=sub, weights=wtvar)
  
  itt.est <- summary(itt.fit)$coef[2,1] # itt
  ittd.est <- summary(ittd.fit)$coef[2,1] # itt.d
  cace.est <- summary(iv.fit)$coef[2,1] # cace
  
  # p value: use one sided if M/C or P/C (j %in% 1:2), use two sided if P/M (j==3)
  if( trt_c == 0 ){
    cace.pval <- pt(coef(summary(iv.fit))[,3], summary(iv.fit)$df[2], lower=TRUE)[2]
  } else {
    cace.pval <- summary(iv.fit)$coef[2,4]
  }
  
  out <- c(Y, trt_t, trt_c, round(itt.est,3), round(ittd.est,3), round(cace.est,3), round(cace.pval,3))
  names(out) <- c("Y", "treatment", "comparison", "ITT", "ITT_D", "CACE", "p-value")
  return(out)
}

# M vs C - estimate CACE using IV
mc.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wb[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wb[4], D="TD", trt_t=1, trt_c=0))

mc.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wh[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.wh[4], D="TD", trt_t=1, trt_c=0))

mc.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.bh[3], D="TD", trt_t=1, trt_c=0),
                    cace.est(dat=dat, Y=outvars.bh[4], D="TD", trt_t=1, trt_c=0))

# P vs C (v1) - treat Z=punitive D=monitoring as fail-to-treat
pc1.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[4], D="TD_a", trt_t=2, trt_c=0))

pc1.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[4], D="TD_a", trt_t=2, trt_c=0))

pc1.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[3], D="TD_a", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[4], D="TD_a", trt_t=2, trt_c=0))

# P vs C (v2) - treat Z=punitive D=monitoring as effectively punitive messaging
pc2.cace.wb <- rbind(cace.est(dat=dat, Y=outvars.wb[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wb[4], D="TD_b", trt_t=2, trt_c=0))

pc2.cace.wh <- rbind(cace.est(dat=dat, Y=outvars.wh[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.wh[4], D="TD_b", trt_t=2, trt_c=0))

pc2.cace.bh <- rbind(cace.est(dat=dat, Y=outvars.bh[2], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[3], D="TD_b", trt_t=2, trt_c=0),
                     cace.est(dat=dat, Y=outvars.bh[4], D="TD_b", trt_t=2, trt_c=0))

# Stitch together results
stitch.cace <- function(wb, wh, bh, wb.labs, wh.labs, bh.labs){
  blanks <- rep(NA, 4)
  out <- rbind(blanks, wb[,4:7],
               blanks, wh[,4:7],
               blanks, bh[,4:7])
  out <- cbind(c("A. White vs. Black", wb.labs[2:4],
                 "B. White vs. Hispanic", wh.labs[2:4],
                 "C. Black vs. Hispanic", bh.labs[2:4]), out)
  out[,1] <- gsub("(White vs. Black)", "", out[,1], fixed=TRUE)
  out[,1] <- gsub("(White vs. Hispanic)", "", out[,1], fixed=TRUE)
  out[,1] <- gsub("(Black vs. Hispanic)", "", out[,1], fixed=TRUE)

  rownames(out) <- NULL
  colnames(out) <- c("Outcome", "ITT", "ITT_D", "CACE", "p-value")
  return(out)
}

cace.mc <- stitch.cace(wb=mc.cace.wb,
                       wh=mc.cace.wh,
                       bh=mc.cace.bh,
                       wb.labs=outvars.wb.labs,
                       wh.labs=outvars.wh.labs,
                       bh.labs=outvars.bh.labs)
cace.pc1 <- stitch.cace(wb=pc1.cace.wb,
                        wh=pc1.cace.wh,
                        bh=pc1.cace.bh,
                        wb.labs=outvars.wb.labs,
                        wh.labs=outvars.wh.labs,
                        bh.labs=outvars.bh.labs)
cace.pc2 <- stitch.cace(wb=pc2.cace.wb,
                        wh=pc2.cace.wh,
                        bh=pc2.cace.bh,
                        wb.labs=outvars.wb.labs,
                        wh.labs=outvars.wh.labs,
                        bh.labs=outvars.bh.labs)

# Assemble final CACE table (same info; two possible formats)
cace.table1 <- rbind(c("I. Monitoring vs. Control", rep(NA, 4)),
                     cace.mc,
                     c("II. Punitive vs. Control (Upper Bound)", rep(NA,4)),
                     cace.pc1,
                     c("III. Punitive vs. Control (Lower Bound)", rep(NA, 4)),
                     cace.pc2)
cace.pc <- list()
keep.rows <- c(1,5,9)
for(i in 1:nrow(cace.pc1)){
  if(i %in% keep.rows){
    cace.pc[[i]] <- cace.pc1[i,]
  } else {
    cace.pc[[i]] <- rbind(c(cace.pc1[i,1], rep(NA, 4)),
                          c("v1",cace.pc1[i,2:5]),
                          c("v2",cace.pc2[i,2:5]))
  }
}
cace.pc <- do.call(rbind, cace.pc)
cace.pc[,1] <- gsub("v1", "Upper bound", cace.pc[,1], fixed=TRUE)
cace.pc[,1] <- gsub("v2", "Lower bound", cace.pc[,1], fixed=TRUE)
cace.table2 <- rbind(c("I. Monitoring vs. Control", rep(NA, 4)),
                     cace.mc,
                     c("II. Punitive vs. Control (Upper and Lower Bounds)", rep(NA, 4)),
                     cace.pc)
kable(cace.table2, caption="**Table A14**")
```

```{r, echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and export tables
# we create two versions with different formats but with the same information
# out_tablea14_cace_table_format2.tex is the table used in the appendix

cace1 <- print(xtable(cace.table1, align="llrrrr"), include.rownames=FALSE)
cace1 <- unlist(strsplit(cace1, "\n"))

cace1.caption.short <- "Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels for Monitoring-Control and Punitive-Control Comparisons"
cace1.caption <- "\\footnotesize Estimated Complier Average Causal Effects of Messages on Net Discrimination Levels for Monitoring-Control and Punitive-Control Comparisons. Cells contain estimates of the $ITT$, $ITT_D$ (proportion of Compliers), $CACE$, and $p$-values. $ITT$ and $ITT_D$ estimates are from OLS models with inverse probability weights and block fixed effects. $CACE$ estimates are from IV regression models with inverse probability weights and block fixed effects. Estimated $p$-values are reported in parentheses; $p$-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for all analyses involving net discrimination against Hispanic (vs. black) testers. For the punitive-control comparison, the upper bound on the $CACE$ treats all subjects assigned to the punitive condition but who received the monitoring condition as subjects not complying with their treatment assignment. The lower bound on the CACE interprets the receipt of a monitoring message when assigned to punitive as effectively receiving a punitive message."

cace1 <- c(cace1[3:4],
           "\\resizebox{.85\\textwidth}{!}{",
           cace1[5:49],
           "}",
           paste0("\\caption[",cace1.caption.short,"]{",cace1.caption,"}"),
           "\\label{cace1}",
           cace1[50])

cace1 <- gsub("I. Monitoring vs. Control &  &  &  &",
              "\\multicolumn{5}{c}{I. Monitoring vs. Control}",
              cace1, fixed=TRUE)
cace1 <- gsub("II. Punitive vs. Control (Upper Bound) &  &  &  &",
              "\\multicolumn{5}{c}{II. Punitive vs. Control (Upper Bound)}",
              cace1, fixed=TRUE)
cace1 <- gsub("III. Punitive vs. Control (Lower Bound) &  &  &  &",
              "\\multicolumn{5}{c}{III. Punitive vs. Control (Lower Bound)}",
              cace1, fixed=TRUE)

cace1 <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", cace1, fixed=TRUE)
cace1 <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", cace1, fixed=TRUE)
cace1 <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", cace1, fixed=TRUE)
cat(cace1, file="out_tablea14_cace_table_format1.tex", sep="\n")


cace2 <- print(xtable(cace.table2, align="llrrrr"), include.rownames=FALSE)
cace2 <- unlist(strsplit(cace2, "\n"))

cace2.caption.short <- cace1.caption.short
cace2.caption <- cace1.caption

cace2 <- c(cace2[3:4],
           "\\resizebox{.8\\textwidth}{!}{",
           cace2[5],
           cace2[7:21],
           "\\hline\\hline",
           cace2[22:54],
           "}",
           paste0("\\caption[",cace2.caption.short,"]{",cace2.caption,"}"),
           "\\label{cace2}",
           cace2[55])

cace2 <- gsub("Upper bound &", "\\qquad Upper bound &", cace2, fixed=TRUE)
cace2 <- gsub("Lower bound &", "\\qquad Lower bound &", cace2, fixed=TRUE)
cace2 <- gsub("Outcome & ITT & ITT\\_D & CACE & p-value \\\\ ",
              "Outcome & $ITT$ & $ITT_D$ & $CACE$ & $p$-value \\\\ ",
              cace2, fixed=TRUE)

cace2 <- gsub("I. Monitoring vs. Control &  &  &  &", 
              "\\multicolumn{5}{c}{I. Monitoring vs. Control}",
              cace2, fixed=TRUE)
cace2 <- gsub("II. Punitive vs. Control (Upper and Lower Bounds) &  &  &  &", 
              "\\multicolumn{5}{c}{II. Punitive vs. Control (Upper and Lower Bounds)}",
              cace2, fixed=TRUE)

cace2 <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", cace2, fixed=TRUE)
cace2 <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", cace2, fixed=TRUE)
cace2 <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", cace2, fixed=TRUE)
cat(cace2, file="out_tablea14_cace_table_format2.tex", sep="\n")
```

## Table A15 (p. A-36): Covariate Adjusted ITT Estimates

### Lasso Regression for Covariate Selection

We first use the lasso as a principled method to select pre-treatment covariates that are prognostic of outcomes by arm (which are then used to estimate covariate adjusted ITT effects). The code below conducts this covariate selection procedure and produces the output files `misc_covselect_select0.csv`, `misc_covselect_select1.csv`, and `misc_covselect_select2.csv` (which are included in the replication archive). Note: This code chunk takes awhile to run.

```{r covselect, echo=T, eval=T, warning=F}
Ys <- c("index.wb", "ncb_wb", "noff_wb",
        "index.wh", "ncb_wh", "noff_wh",
        "index.bh", "ncb_bh", "noff_bh")

llrace <- c("primary_api", "primary_blk", "primary_hsp", "primary_wht")
llage <- c("primary_age_18to34", "primary_age_35to44", "primary_age_45to64", "primary_age_65over", "primary_age_unknown")
testerfe <- c("tid.A01", "tid.A10", "tid.A11", "tid.A13", "tid.A02", "tid.A21", "tid.A22", "tid.A03", "tid.A04", "tid.A05",
              "tid.A06", "tid.A07", "tid.A08", "tid.A09", "tid.B01", "tid.B11", "tid.B12", "tid.B14", "tid.B16", "tid.B02", 
              "tid.B20", "tid.B23", "tid.B24", "tid.B25", "tid.B27", "tid.B03", "tid.B04", "tid.B06", "tid.B07", "tid.B08", 
              "tid.B09", "tid.C01", "tid.C10", "tid.C12", "tid.C13", "tid.C14", "tid.C15", "tid.C02", "tid.C27", "tid.C29", 
              "tid.C03", "tid.C31", "tid.C33", "tid.C04", "tid.C05", "tid.C06", "tid.C07", "tid.C08", "tid.C09")
tafe <- testerfe[grepl(".A",testerfe,fixed=TRUE)]
tbfe <- testerfe[grepl(".B",testerfe,fixed=TRUE)]
tcfe <- testerfe[grepl(".C",testerfe,fixed=TRUE)]

Xs <- c("frame", "partnered", "rent + m.rent", "numbr", "sqft + m.sqft","regime1","regime2","regime3",
        "nnumattr_bh", "nnumattr_wh", "nnumattr_wb", "nnumskep_bh", "nnumskep_wh", "nnumskep_wb", 
        "npctskep_bh", "npctskep_wh", "npctskep_wb", "nnumpos_bh" , "nnumpos_wh" , "nnumpos_wb" , 
        "nnumneu_bh" , "nnumneu_wh" , "nnumneu_wb" , "nnumneg_bh" , "nnumneg_wh" , "nnumneg_wb" , 
        "npctpos_bh" , "npctpos_wh" , "npctpos_wb" , "npctneu_bh" , "npctneu_wh" , "npctneu_wb" , 
        "npctneg_bh" , "npctneg_wh" , "npctneg_wb" , "nanyskep_bh", "nanyskep_wh", "nanyskep_wb", 
        "nanyneg_bh" , "nanyneg_wh" , "nanyneg_wb" , "team_gender", "broker",
        "callorder.wb", "callorder.wh", "callorder.bh",
        "incrank.wb.gt", "incrank.wh.gt", "incrank.bh.gt",
        "incrank.wb.eq", "incrank.wh.eq", "incrank.bh.eq",
        "incrank.wb.lt", "incrank.wh.lt", "incrank.bh.lt",
        "boro.brx", "boro.brk", "boro.mnh", "boro.que", "boro.stn",
        "inc.w.hi","inc.b.hi","inc.h.hi","ll.female + m.ll.female",
        llrace, llage, testerfe)

# Define sets of matched pair vars to kick out, by outcome
wb.vars <- Xs[grepl(".wb",Xs,fixed=TRUE) | grepl("_wb",Xs,fixed=TRUE)]
wh.vars <- Xs[grepl(".wh",Xs,fixed=TRUE) | grepl("_wh",Xs,fixed=TRUE)]
bh.vars <- Xs[grepl(".bh",Xs,fixed=TRUE) | grepl("_bh",Xs,fixed=TRUE)]


# Change factor vars to numeric (needed for cross validation function)
dat$frame <- as.numeric(dat$frame) # 1=likely disc, 2=representative (recoded to 0)
dat$frame <- ifelse(dat$frame==2, 0, dat$frame)

dat$team_gender <- as.numeric(dat$team_gender) # 1=female, 2=male (recoded to 0)
dat$team_gender <- ifelse(dat$team_gender==2, 0, dat$team_gender)

# Subset data by treatment arm
sub0 <- dat[dat$TA==0,]
sub1 <- dat[dat$TA==1,]
sub2 <- dat[dat$TA==2,]

#------------------------------------------------#
# Variable selection using lasso
#------------------------------------------------#
set.seed(20150210)
nfolds <- 5
niter <- 1000  # number of times to shuffle folds

# ============== #
# Z=0 (control)
# ============== #

# initialize object to store results
# nested lists of 9 (one per outcome)
# one for model selected at lambda.min (cv0.min)
# another for model selected at lambda.1se (cv0.1se)
# one for a final set of covs we choose (cv0)
cv0.min <- cv0.1se <- list()
cv0 <- list()

# iterate over outcomes
  for(j in 1:length(Ys)){
  # cat("\n ******************* Outcome: ", Ys[j] ," ******************* \n", sep="")
  # pre-process data conditional on outcome of interest
  if(j %in% 1:3) {
    subX <- Xs[!(Xs %in% c(wh.vars, bh.vars))]  # if excluding hisp tester FE add: , tbfe
  } else if(j %in% 4:6) {
    subX <- Xs[!(Xs %in% c(wb.vars, bh.vars))]  # if excluding black tester FE add: , tafe
  } else if(j %in% 7:9) {
    subX <- Xs[!(Xs %in% c(wb.vars, wh.vars))]  # if excluding white tester FE add: , tcfe
  }
  temp <- sub0[,names(sub0) %in% c(Ys[j], subX, "ipw")]
  temp <- temp[apply(temp, 1, function(x) sum(is.na(x))) == 0,]  # kick out missing (required for cv.glmnet)
    
  # initialize object to store
  cv.out <- list()
  
  # initialize vector of fold assignments
  folds0 <- c(rep(1, round(nrow(temp)/nfolds, 0)),
              rep(2, round(nrow(temp)/nfolds, 0)),
              rep(3, round(nrow(temp)/nfolds, 0)),
              rep(4, round(nrow(temp)/nfolds, 0)),
              rep(5, nrow(temp) - 4*round(nrow(temp)/nfolds, 0) ))
    
  # iterate over shuffled fold assignments
  for(i in 1:niter){
    # cat(i, " ", sep="")
    # shuffle folds
    fold.assign <- folds0[sample(1:nrow(temp), nrow(temp), replace=FALSE)]
    # lasso
    cv.out[[i]] <- cv.glmnet(x=as.matrix(temp[,!(names(temp) %in% c(Ys[j], "ipw"))]),
                             y=as.vector(temp[[Ys[j]]]),
                             weights=temp$ipw,
                             type.measure="mse",
                             nfolds=5,
                             foldid=fold.assign,
                             alpha=1)
  }
  # Grab variables associated with model at lambda.1se and lambda.min
  out.1se <- lapply(cv.out, function(x) grabvars(x, s="lambda.1se"))
  out.min <- lapply(cv.out, function(x) grabvars(x, s="lambda.min"))
  vars.1se <- lapply(out.1se, function(x) as.character(x$vars[,1]))
  vars.min <- lapply(out.min, function(x) as.character(x$vars[,1]))
  
  # For each candidate model, fit Y ~ X for the treatment arm of interest
  fit.1se <- fit.min <- list()
  
  for(k in 1:length(vars.1se)){
    # one SE rule
    if ( length(vars.1se[[k]]) > 1 ) {
      X.selected <- vars.1se[[k]][vars.1se[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub0)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.1se[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.1se[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)
    }
    
    # min lambda
    if ( length(vars.min[[k]]) > 1 ) {
      X.selected <- vars.min[[k]][vars.min[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub0)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.min[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.min[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)      
    }
  }
  
  # collect all results
  fit.min <- do.call(rbind, fit.min)
  fit.1se <- do.call(rbind, fit.1se)
  fit.min <- as.data.frame(fit.min)
  fit.1se <- as.data.frame(fit.1se)
  names(fit.1se) <- names(fit.min) <- c("variables","adj.r2","f.stat","f.pvalue")
  fit.min[,1] <- as.character(fit.min[,1])
  fit.1se[,1] <- as.character(fit.1se[,1])
  for(v in 2:ncol(fit.min)) fit.min[,v] <- as.numeric(as.character(fit.min[,v]))
  for(v in 2:ncol(fit.1se)) fit.1se[,v] <- as.numeric(as.character(fit.1se[,v]))
  
  # choose variables that are (1) not missing, (2) with highest adj R2, and (3) with significant f stat p value
  choose.min <- fit.min[!is.na(fit.min$variables) & fit.min$adj.r2==max(fit.min$adj.r2, na.rm=TRUE) & fit.min$f.pvalue < 0.05,]
  choose.1se <- fit.1se[!is.na(fit.1se$variables) & fit.1se$adj.r2==max(fit.1se$adj.r2, na.rm=TRUE) & fit.1se$f.pvalue < 0.05,]

  # store result - lambda min
  if(nrow(choose.min)==0){
    cv0.min[[j]] <- rep(NA,4)
  } else {
    cv0.min[[j]] <- choose.min <- choose.min[!duplicated(choose.min),]
  }
  
  # store result - lambda 1se
  if(nrow(choose.1se)==0){
    cv0.1se[[j]] <- rep(NA,4)
  } else {
    cv0.1se[[j]] <- choose.1se <- choose.1se[!duplicated(choose.1se),]
  }
}

# ============== #
# Z=1 (monitoring)
# ============== # 

# initialize object to store results
# nested lists of 9 (one per outcome)
# one for model selected at lambda.min (cv1.min)
# another for model selected at lambda.1se (cv1.1se)
# one for a final set of covs we choose (cv1)
cv1.min <- cv1.1se <- list()
# iterate over outcomes
  for(j in 1:length(Ys)){
  # cat("\n ******************* Outcome: ", Ys[j] ," ******************* \n", sep="")
  # pre-process data conditional on outcome of interest
  if(j %in% 1:3) {
    subX <- Xs[!(Xs %in% c(wh.vars, bh.vars))]  # if excluding hisp tester FE add: , tbfe
  } else if(j %in% 4:6) {
    subX <- Xs[!(Xs %in% c(wb.vars, bh.vars))]  # if excluding black tester FE add: , tafe
  } else if(j %in% 7:9) {
    subX <- Xs[!(Xs %in% c(wb.vars, wh.vars))]  # if excluding white tester FE add: , tcfe
  }
  temp <- sub1[,names(sub1) %in% c(Ys[j], subX, "ipw")]
  temp <- temp[apply(temp, 1, function(x) sum(is.na(x))) == 0,]  # kick out missing (required for cv.glmnet)
  
  # initialize object to store
  cv.out <- list()
  # initialize vector of fold assignments
  folds0 <- c(rep(1, round(nrow(temp)/nfolds, 0)),
              rep(2, round(nrow(temp)/nfolds, 0)),
              rep(3, round(nrow(temp)/nfolds, 0)),
              rep(4, round(nrow(temp)/nfolds, 0)),
              rep(5, nrow(temp) - 4*round(nrow(temp)/nfolds, 0) ))
  # iterate over shuffled fold assignments
  for(i in 1:niter){
    # cat(i, " ", sep="")
    # shuffle folds
    fold.assign <- folds0[sample(1:nrow(temp), nrow(temp), replace=FALSE)]
    # lasso
    cv.out[[i]] <- cv.glmnet(x=as.matrix(temp[,!(names(temp) %in% c(Ys[j], "ipw"))]),
                             y=as.vector(temp[[Ys[j]]]),
                             weights=temp$ipw,
                             type.measure="mse",
                             nfolds=5,
                             foldid=fold.assign,
                             alpha=1)
  }
  # Grab variables associated with model at lambda.1se and lambda.min
  out.1se <- lapply(cv.out, function(x) grabvars(x, s="lambda.1se"))
  out.min <- lapply(cv.out, function(x) grabvars(x, s="lambda.min"))
  vars.1se <- lapply(out.1se, function(x) as.character(x$vars[,1]))
  vars.min <- lapply(out.min, function(x) as.character(x$vars[,1]))
  
  # For each candidate model, fit Y ~ X for the treatment arm of interest
  fit.1se <- fit.min <- list()
  for(k in 1:length(vars.1se)){
    # one SE rule
    if ( length(vars.1se[[k]]) > 1 ) {
      X.selected <- vars.1se[[k]][vars.1se[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub1)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.1se[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.1se[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)
    }
    # min lambda
    if ( length(vars.min[[k]]) > 1 ) {
      X.selected <- vars.min[[k]][vars.min[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub1)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.min[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.min[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)      
    }
  }
  
  # collect all results
  fit.min <- do.call(rbind, fit.min)
  fit.1se <- do.call(rbind, fit.1se)
  fit.min <- as.data.frame(fit.min)
  fit.1se <- as.data.frame(fit.1se)
  names(fit.1se) <- names(fit.min) <- c("variables","adj.r2","f.stat","f.pvalue")
  fit.min[,1] <- as.character(fit.min[,1])
  fit.1se[,1] <- as.character(fit.1se[,1])
  for(v in 2:ncol(fit.min)) fit.min[,v] <- as.numeric(as.character(fit.min[,v]))
  for(v in 2:ncol(fit.1se)) fit.1se[,v] <- as.numeric(as.character(fit.1se[,v]))
  
  # choose variables that are (1) not missing, (2) with highest adj R2, and (3) with significant f stat p value
  choose.min <- fit.min[!is.na(fit.min$variables) & fit.min$adj.r2==max(fit.min$adj.r2, na.rm=TRUE) & fit.min$f.pvalue < 0.05,]
  choose.1se <- fit.1se[!is.na(fit.1se$variables) & fit.1se$adj.r2==max(fit.1se$adj.r2, na.rm=TRUE) & fit.1se$f.pvalue < 0.05,]
  
  # store result - lambda min
  if(nrow(choose.min)==0){
    cv1.min[[j]] <- rep(NA,4)
  } else {
    cv1.min[[j]] <- choose.min <- choose.min[!duplicated(choose.min),]
  }
  
  # store result - lambda 1se
  if(nrow(choose.1se)==0){
    cv1.1se[[j]] <- rep(NA,4)
  } else {
    cv1.1se[[j]] <- choose.1se <- choose.1se[!duplicated(choose.1se),]
  }
  
  }

# ============== #
# Z=2 (punitive)
# ============== # 

# initialize object to store results
# nested lists of 9 (one per outcome)
# one for model selected at lambda.min (cv2.min)
# another for model selected at lambda.1se (cv2.1se)
# one for a final set of covs we choose (cv1)
cv2.min <- cv2.1se <- list()
# iterate over outcomes
  for(j in 1:length(Ys)){
  # cat("\n ******************* Outcome: ", Ys[j] ," ******************* \n", sep="")
  # pre-process data conditional on outcome of interest
  if(j %in% 1:3) {
    subX <- Xs[!(Xs %in% c(wh.vars, bh.vars))]  # if excluding hisp tester FE add: , tbfe
  } else if(j %in% 4:6) {
    subX <- Xs[!(Xs %in% c(wb.vars, bh.vars))]  # if excluding black tester FE add: , tafe
  } else if(j %in% 7:9) {
    subX <- Xs[!(Xs %in% c(wb.vars, wh.vars))]  # if excluding white tester FE add: , tcfe
  }
  temp <- sub2[,names(sub2) %in% c(Ys[j], subX, "ipw")]
  temp <- temp[apply(temp, 1, function(x) sum(is.na(x))) == 0,]  # kick out missing (required for cv.glmnet)
  # initialize object to store
  cv.out <- list()
  # initialize vector of fold assignments
  folds0 <- c(rep(1, round(nrow(temp)/nfolds, 0)),
              rep(2, round(nrow(temp)/nfolds, 0)),
              rep(3, round(nrow(temp)/nfolds, 0)),
              rep(4, round(nrow(temp)/nfolds, 0)),
              rep(5, nrow(temp) - 4*round(nrow(temp)/nfolds, 0) ))
  # iterate over shuffled fold assignments
  for(i in 1:niter){
    # cat(i, " ", sep="")
    # shuffle folds
    fold.assign <- folds0[sample(1:nrow(temp), nrow(temp), replace=FALSE)]
    # lasso
    cv.out[[i]] <- cv.glmnet(x=as.matrix(temp[,!(names(temp) %in% c(Ys[j], "ipw"))]),
                             y=as.vector(temp[[Ys[j]]]),
                             weights=temp$ipw,
                             type.measure="mse",
                             nfolds=5,
                             foldid=fold.assign,
                             alpha=1)
  }
  # Grab variables associated with model at lambda.1se and lambda.min
  out.1se <- lapply(cv.out, function(x) grabvars(x, s="lambda.1se"))
  out.min <- lapply(cv.out, function(x) grabvars(x, s="lambda.min"))
  vars.1se <- lapply(out.1se, function(x) as.character(x$vars[,1]))
  vars.min <- lapply(out.min, function(x) as.character(x$vars[,1]))
  # For each candidate model, fit Y ~ X for the treatment arm of interest
  fit.1se <- fit.min <- list()
  for(k in 1:length(vars.1se)){
    # one SE rule
    if ( length(vars.1se[[k]]) > 1 ) {
      X.selected <- vars.1se[[k]][vars.1se[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub2)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.1se[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.1se[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)
    }
    
    # min lambda
    if ( length(vars.min[[k]]) > 1 ) {
      X.selected <- vars.min[[k]][vars.min[[k]] != "(Intercept)"]
      X.selected <- paste(X.selected, collapse=" + ")
      model <- paste(Ys[j], "~", X.selected, sep=" ")
      fit <- lm(formula=model, data=sub2)
      adjusted.r2 <- summary(fit)$adj.r.squared
      f.stat <- summary(fit)$fstatistic[1]
      f.pvalue <- 1-pf(q=summary(fit)$fstatistic[1], df1=summary(fit)$fstatistic[2], df2=summary(fit)$fstatistic[3])
      fit.min[[k]] <- c(X.selected, adjusted.r2, f.stat, f.pvalue)      
    } else {
      adjusted.r2 <- NA
      f.stat <- NA
      f.pvalue <- NA
      fit.min[[k]] <- c(NA, adjusted.r2, f.stat, f.pvalue)      
    }
  }
  
  # collect all results
  fit.min <- do.call(rbind, fit.min)
  fit.1se <- do.call(rbind, fit.1se)
  fit.min <- as.data.frame(fit.min)
  fit.1se <- as.data.frame(fit.1se)
  names(fit.1se) <- names(fit.min) <- c("variables","adj.r2","f.stat","f.pvalue")
  fit.min[,1] <- as.character(fit.min[,1])
  fit.1se[,1] <- as.character(fit.1se[,1])
  for(v in 2:ncol(fit.min)) fit.min[,v] <- as.numeric(as.character(fit.min[,v]))
  for(v in 2:ncol(fit.1se)) fit.1se[,v] <- as.numeric(as.character(fit.1se[,v]))
  
  # choose variables that are (1) not missing, (2) with highest adj R2, and (3) with significant f stat p value
  choose.min <- fit.min[!is.na(fit.min$variables) & fit.min$adj.r2==max(fit.min$adj.r2, na.rm=TRUE) & fit.min$f.pvalue < 0.05,]
  choose.1se <- fit.1se[!is.na(fit.1se$variables) & fit.1se$adj.r2==max(fit.1se$adj.r2, na.rm=TRUE) & fit.1se$f.pvalue < 0.05,]
  
  # store result - lambda min
  if(nrow(choose.min)==0){
    cv2.min[[j]] <- rep(NA,4)
  } else {
    cv2.min[[j]] <- choose.min <- choose.min[!duplicated(choose.min),]
  }
  
  # store result - lambda 1se
  if(nrow(choose.1se)==0){
    cv2.1se[[j]] <- rep(NA,4)
  } else {
    cv2.1se[[j]] <- choose.1se <- choose.1se[!duplicated(choose.1se),]
  }
  
  }

# Label items in list
names(cv0.1se) <- names(cv1.1se) <- names(cv2.1se) <- Ys
names(cv0.min) <- names(cv1.min) <- names(cv2.min) <- Ys

# For each, check for more than one result, if so check if the predictors are the same
#  Deduplicate selected variables (if multiple results returned)
for(i in 1:length(cv0.min)){

  # cv0.min
  if( !is.null(nrow(cv0.min[[i]])) ) {
    if ( nrow(cv0.min[[i]]) > 1 ) {
      var.comp <- strsplit(cv0.min[[i]][,1], " + ", fixed=TRUE)
      var.comp <- lapply(var.comp, function(x) sort(x))
      # if there are the same # of vars then
      if ( apply(as.matrix(sapply(var.comp, length)), MARGIN=2, function(x) sum(length(unique(x)))==1) ) {
        # combine all
        var.comp <- do.call(cbind, var.comp)
        # if all vars are the same
        if ( mean(apply(var.comp, MARGIN=1, function(x) sum(length(unique(x)))==1))==1 ) {
          cv0.min[[i]] <- cv0.min[[i]][sample(1:nrow(cv0.min[[i]]), 1),]
        } 
      }
    }
  }
  
  # cv1.min
  if( !is.null(nrow(cv1.min[[i]])) ) {
    if ( nrow(cv1.min[[i]]) > 1 ) {
      var.comp <- strsplit(cv1.min[[i]][,1], " + ", fixed=TRUE)
      var.comp <- lapply(var.comp, function(x) sort(x))
      # if there are the same # of vars then
      if ( apply(as.matrix(sapply(var.comp, length)), MARGIN=2, function(x) sum(length(unique(x)))==1) ) {
        # combine all
        var.comp <- do.call(cbind, var.comp)
        # if all vars are the same
        if ( mean(apply(var.comp, MARGIN=1, function(x) sum(length(unique(x)))==1))==1 ) {
          cv1.min[[i]] <- cv1.min[[i]][sample(1:nrow(cv1.min[[i]]), 1),]
        } 
      }
    }
  }
  
  # cv2.min
  if( !is.null(nrow(cv2.min[[i]])) ) {
    if ( nrow(cv2.min[[i]]) > 1 ) {
      var.comp <- strsplit(cv2.min[[i]][,1], " + ", fixed=TRUE)
      var.comp <- lapply(var.comp, function(x) sort(x))
      # if there are the same # of vars then
      if ( apply(as.matrix(sapply(var.comp, length)), MARGIN=2, function(x) sum(length(unique(x)))==1) ) {
        # combine all
        var.comp <- do.call(cbind, var.comp)
        # if all vars are the same
        if ( mean(apply(var.comp, MARGIN=1, function(x) sum(length(unique(x)))==1))==1 ) {
          cv2.min[[i]] <- cv2.min[[i]][sample(1:nrow(cv2.min[[i]]), 1),]
        } 
      }
    }
  }
}

# Collapse into a matrix
select0 <- do.call(rbind, cv0.min)
select1 <- do.call(rbind, cv1.min)
select2 <- do.call(rbind, cv2.min)

# Save output - these are the selected covariates by arm
write.csv(select0, "misc_covselect_select0.csv", row.names=TRUE)
write.csv(select1, "misc_covselect_select1.csv", row.names=TRUE)
write.csv(select2, "misc_covselect_select2.csv", row.names=TRUE)
```

### Covariate Adjusted ITT Estimation

The following code estimates covariate adjusted ITT effects and bootstraps 95% confidence intervals. Note: This code chunk takes awhile to run. This code chunk also saves main results used to build Table A15, which is done in the next code chunk below.

```{r covadjitt, echo=T, eval=T, warning=F}
##=============================================================================##
## Covariate Adjustment
##=============================================================================##

# define outcomes 
Ys <- c("index.wb", "ncb_wb", "noff_wb",
        "index.wh", "ncb_wh", "noff_wh",
        "index.bh", "ncb_bh", "noff_bh")

# read in covariates from variable selection procedure

select0 <- read.csv("misc_covselect_select0.csv", header=TRUE, colClasses="character")
select1 <- read.csv("misc_covselect_select1.csv", header=TRUE, colClasses="character")
select2 <- read.csv("misc_covselect_select2.csv", header=TRUE, colClasses="character")

names(select0)[1] <- names(select1)[1] <- names(select2)[1] <- "Y"

# create block FE dummy variables
block.dums <- model.matrix(~ -1 + as.factor(block), data=dat)
colnames(block.dums) <- paste("block", 1:17, sep="")
#head(block.dums)
dat <- cbind(dat, block.dums)
#names(dat)

# subset data by treatment arm

sub0 <- dat[dat$TA==0,]
sub1 <- dat[dat$TA==1,]
sub2 <- dat[dat$TA==2,]

# create vector of block fixed effect variables
blockfe <- paste("block", 2:17, sep="")

# create probability of treatment (for 2 group estimator)
# if in regime 1, equal probability of treatment

dat$pt10 <- ifelse(dat$regime==1, .5, ifelse(dat$regime==2, 1/3, .5))
dat$pt20 <- ifelse(dat$regime==1, .5, ifelse(dat$regime==2, 1/3, .5))
dat$pt21 <- ifelse(dat$regime==1, .5, ifelse(dat$regime==2, .5, .5))

# create ipw from probability of treatment (for 2 group estimator)

dat$ipw10 <- ifelse(dat$TA == 1, 1/dat$pt10, ifelse(dat$TA==0, 1/(1-dat$pt10), NA))
dat$ipw20 <- ifelse(dat$TA == 2, 1/dat$pt20, ifelse(dat$TA==0, 1/(1-dat$pt20), NA))
dat$ipw21 <- ifelse(dat$TA == 2, 1/dat$pt21, ifelse(dat$TA==1, 1/(1-dat$pt21), NA))

# create vectors of covariates by treatment arm and outcome
c0 <- lapply(strsplit(select0$variables, " + ", fixed=TRUE), function(x) c(x[!is.na(x)], blockfe))
c1 <- lapply(strsplit(select1$variables, " + ", fixed=TRUE), function(x) c(x[!is.na(x)], blockfe))
c2 <- lapply(strsplit(select2$variables, " + ", fixed=TRUE), function(x) c(x[!is.na(x)], blockfe))

# covariate adjusted estimates with empirical sandwich variance estimator

esv2g.10 <- NULL
esv2g.20 <- NULL
esv2g.21 <- NULL

for(i in 1:length(Ys)){
  esv2g.10[[i]] <- esv2g(data=dat,zvar="TA",Yvar=Ys[i],covs0=c0[[i]],covs1=c1[[i]],z1=1,z0=0,ipwvar="ipw10",ptvar="pt10")
  esv2g.20[[i]] <- esv2g(data=dat,zvar="TA",Yvar=Ys[i],covs0=c0[[i]],covs1=c2[[i]],z1=2,z0=0,ipwvar="ipw20",ptvar="pt20")
  esv2g.21[[i]] <- esv2g(data=dat,zvar="TA",Yvar=Ys[i],covs0=c1[[i]],covs1=c2[[i]],z1=2,z0=1,ipwvar="ipw21",ptvar="pt21")  
}


# bootstrap

set.seed(20150229) # set seed locally for reproducible bootstrap estimates

bs2g.10 <- NULL
bs2g.20 <- NULL
bs2g.21 <- NULL

# Define loop functions to run in parallel
bsfun_1 <- function(i) {
  bs(data=dat, sims=5000, zvar="TA", Yvar=Ys[i], z0=0, z1=1, X0=c0[[i]], X1=c1[[i]], ipwvar="ipw10",ptvar="pt10")
}

bsfun_2 <- function(i) {
  bs(data=dat, sims=5000, zvar="TA", Yvar=Ys[i], z0=0, z1=2, X0=c0[[i]], X1=c2[[i]], ipwvar="ipw20",ptvar="pt20")
}

bsfun_3 <- function(i) {
  bs(data=dat, sims=5000, zvar="TA", Yvar=Ys[i], z0=1, z1=2, X0=c1[[i]], X1=c2[[i]], ipwvar="ipw21",ptvar="pt21")  
}

bs2g.10 <- parallel::mclapply(1:length(Ys), bsfun_1)
bs2g.20 <- parallel::mclapply(1:length(Ys), bsfun_2)
bs2g.21 <- parallel::mclapply(1:length(Ys), bsfun_3)

# assemble results into matrices

bs10.est <- sapply(bs2g.10, function(x) unlist(x[1,]))
bs10.var <- sapply(bs2g.10, function(x) unlist(x[2,]))
bs10.se <- sapply(bs2g.10, function(x) unlist(x[3,]))

bs20.est <- sapply(bs2g.20, function(x) unlist(x[1,]))
bs20.var <- sapply(bs2g.20, function(x) unlist(x[2,]))
bs20.se <- sapply(bs2g.20, function(x) unlist(x[3,]))

bs21.est <- sapply(bs2g.21, function(x) unlist(x[1,]))
bs21.var <- sapply(bs2g.21, function(x) unlist(x[2,]))
bs21.se <- sapply(bs2g.21, function(x) unlist(x[3,]))

es10 <- do.call(rbind, lapply(esv2g.10, function(x) c(x$est, x$se)))
es20 <- do.call(rbind, lapply(esv2g.20, function(x) c(x$est, x$se)))
es21 <- do.call(rbind, lapply(esv2g.21, function(x) c(x$est, x$se)))

# Save bootstrap estimates
save(bs10.est, bs10.var, bs10.se,
     bs20.est, bs20.var, bs20.se,
     bs21.est, bs21.var, bs21.se,
     file="misc_bootstrap_estimates.RData")

# Read in bootstrap estimates
load("misc_bootstrap_estimates.RData")

#============================================================================#
# calculate 95% confidence intervals -- bootstrap
# (1) first order normal approximation
# (2) basic bootstrap interval
# (3) bootstrap percentile interval
# (4) studentized bootstrap interval
# (5) adjusted bootstrap percentile (BCa) interval
#============================================================================#


#==========================#
# (1) normal approximation
#==========================#

# if the ITT estimate is approximately normal, then
# 95% CI is  ITT.hat - b +/- z(alpha)*sqrt(nu)
# where z(alpha) = critical value of z at alpha=0.05
# ITT.hat = estimate (non-bootstrapped, get this from the empirical sandwich estimation output)
# b = mean of bootstrap estimate - ITT.hat
# nu = (1/(R-1))* SUM (over draws) (estimated mean - mean of bootstrap estimates)^2
# R = number of bootstrap draws (sims=5000 for us)

R = 5000

# M-C
bs10.normalci <- list()
for(i in 1:length(Ys)){
  nu <- (1/(R-1)) *   sum((bs10.est[,i]-mean(bs10.est[,i]))^2)
  lci <- es10[i,1] - (mean(bs10.est[,i]) - es10[i,1]) - qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  uci <- es10[i,1] - (mean(bs10.est[,i]) - es10[i,1]) + qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  bs10.normalci[[i]] <- c(lci, uci)
}

# P-C
bs20.normalci <- list()
for(i in 1:length(Ys)){
  nu <- (1/(R-1)) *   sum((bs20.est[,i]-mean(bs20.est[,i]))^2)
  lci <- es20[i,1] - (mean(bs20.est[,i]) - es20[i,1]) - qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  uci <- es20[i,1] - (mean(bs20.est[,i]) - es20[i,1]) + qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  bs20.normalci[[i]] <- c(lci, uci)
}

# P-M
bs21.normalci <- list()
for(i in 1:length(Ys)){
  nu <- (1/(R-1)) *   sum((bs21.est[,i]-mean(bs21.est[,i]))^2)
  lci <- es21[i,1] - (mean(bs21.est[,i]) - es21[i,1]) - qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  uci <- es21[i,1] - (mean(bs21.est[,i]) - es21[i,1]) + qnorm(p=0.975, mean=0, sd=1) * sqrt(nu)
  bs21.normalci[[i]] <- c(lci, uci)
}


bs10.normalci <- do.call(rbind, bs10.normalci)
bs20.normalci <- do.call(rbind, bs20.normalci)
bs21.normalci <- do.call(rbind, bs21.normalci)

# bs10.normalci; bs20.normalci; bs21.normalci

#==========================#
# (2) basic bootstrap interval
#==========================#

# 2*t0 - quantile(t, c(.975, .025))

bs10.basicci <- list()
bs20.basicci <- list()
bs21.basicci <- list()

for(i in 1:length(Ys)){
  bs10.basicci[[i]] <- 2*es10[i,1] - quantile(bs10.est[,i], c(.975, .025))
  bs20.basicci[[i]] <- 2*es20[i,1] - quantile(bs20.est[,i], c(.975, .025))
  bs21.basicci[[i]] <- 2*es21[i,1] - quantile(bs21.est[,i], c(.975, .025))
}

bs10.basicci <- do.call(rbind, bs10.basicci)
bs20.basicci <- do.call(rbind, bs20.basicci)
bs21.basicci <- do.call(rbind, bs21.basicci)
colnames(bs10.basicci) <- colnames(bs20.basicci) <- colnames(bs21.basicci) <- c("lower","upper")

# bs10.basicci; bs20.basicci; bs21.basicci

#==========================#
# (3) percentile interval
#==========================#

# use empirical quantities at the 2.5th and 97.5th percentiles

bs10.pctci <- t(apply(bs10.est, 2, function(x) quantile(x, c(.025, .975)))) 
bs20.pctci <- t(apply(bs20.est, 2, function(x) quantile(x, c(.025, .975))))
bs21.pctci <- t(apply(bs21.est, 2, function(x) quantile(x, c(.025, .975))))

# bs10.pctci; bs20.pctci; bs21.pctci



#==========================#
# (4) studentized boot ci
#==========================#

# generalization of student t statistic to bootstrap setting
# requires variance for bootstrap ITT etsimates computed from each bs sample

# first calculate test statistic for each bs sample
bs10.t <- bs20.t <- bs21.t <- matrix(NA, ncol=length(Ys), nrow=R)

for(i in 1:length(Ys)){
  bs10.t[,i] <- (bs10.est[,i] - es10[i,1])/bs10.se[,i]
  bs20.t[,i] <- (bs20.est[,i] - es20[i,1])/bs20.se[,i]
  bs21.t[,i] <- (bs21.est[,i] - es21[i,1])/bs21.se[,i]
}

# from the empirical distribution of the test statistics,
# we calculate order statistics/quantiles:
#   t*[(R+1)(1-alpha)]
#   t*[(R+1)(alpha)]

bs10.q <- apply(bs10.t, 2, function(x) quantile(x, c(.975, .025)))
bs20.q <- apply(bs20.t, 2, function(x) quantile(x, c(.975, .025)))
bs21.q <- apply(bs21.t, 2, function(x) quantile(x, c(.975, .025)))

# the studentized CI is given by
# lower: ITT.hat - SE.hat * t*[(R+1)(1-alpha)]
# upper: ITT.hat - SE.hat * t*[(R+1)(alpha)]

bs10.tci <- bs20.tci <- bs21.tci <- NULL
for(i in 1:length(Ys)){
  bs10.tci[[i]] <- es10[i,1] - sd(bs10.est[,i]) * bs10.q[,i]
  bs20.tci[[i]] <- es20[i,1] - sd(bs20.est[,i]) * bs20.q[,i]
  bs21.tci[[i]] <- es21[i,1] - sd(bs21.est[,i]) * bs21.q[,i]  
}

bs10.tci <- do.call(rbind, bs10.tci)
bs20.tci <- do.call(rbind, bs20.tci)
bs21.tci <- do.call(rbind, bs21.tci)
colnames(bs10.tci) <- colnames(bs20.tci) <- colnames(bs21.tci) <- c("lower", "upper")

# bs10.tci; bs20.tci; bs21.tci



#============================================================================#
# calculate 95% confidence intervals -- empirical sandwich variance estimator
#============================================================================#

# calculate df by model

n10 <- rep(NA, length(Ys))
n20 <- rep(NA, length(Ys))
n21 <- rep(NA, length(Ys))

for(i in 1:length(Ys)){
  n10[i] <- sum(apply(dat[dat$TA %in% c(0,1),names(dat) %in% c(Ys[i],c0[[i]],c1[[i]],"TA")] ,1,function(x) sum(is.na(x))==0))
  n20[i] <- sum(apply(dat[dat$TA %in% c(0,2),names(dat) %in% c(Ys[i],c0[[i]],c2[[i]],"TA")] ,1,function(x) sum(is.na(x))==0))
  n21[i] <- sum(apply(dat[dat$TA %in% c(1,2),names(dat) %in% c(Ys[i],c1[[i]],c2[[i]],"TA")] ,1,function(x) sum(is.na(x))==0))  
}

# calculate DF as the two-group sample size, minus the sum of the number of covariates across treatment arms,
# minus 3 (2 for the intercepts of arm-specific models; 1 for the treatment-control difference)
df10 <- n10 - (sapply(c0, length)+sapply(c1, length)) - 3
df20 <- n20 - (sapply(c0, length)+sapply(c2, length)) - 3
df21 <- n21 - (sapply(c1, length)+sapply(c2, length)) - 3

cv10 <- -qt(p=0.025, df=df10, lower.tail=TRUE)
cv20 <- -qt(p=0.025, df=df20, lower.tail=TRUE)
cv21 <- -qt(p=0.025, df=df21, lower.tail=TRUE)

# form confidence interval

es10.ci <- cbind(es10[,1]-abs(cv10)*es10[,2], es10[,1]+abs(cv10)*es10[,2])
es20.ci <- cbind(es20[,1]-abs(cv20)*es20[,2], es20[,1]+abs(cv20)*es20[,2])
es21.ci <- cbind(es21[,1]-abs(cv21)*es21[,2], es21[,1]+abs(cv21)*es21[,2])


#============================================================================#
# plot estimates and confidence intervals
#============================================================================#

histYlabs <- c("Interactions index\n(White-Black)", "Callback\n(White-Black)", "Offer\n(White-Black)",
               "Interactions index\n(White-Hispanic)", "Callback\n(White-Hispanic)", "Offer\n(White-Hispanic)",
               "Interactions index\n(Black-Hispanic)", "Callback\n(Black-Hispanic)", "Offer\n(Black-Hispanic)")

# cbind(Ys, histYlabs)

xrange <- max(abs(range(c(es10[,1], bs10.est))))
xrange <- c(-xrange, xrange)

pdf("misc_bootstrap_bs_10_est.pdf", height=10, width=10)
#par(mfrow=c(3,3))
layout(matrix(c(1:9,0,10,0), nrow=4, ncol=3, byrow=TRUE), widths=c(3,3,3), heights=c(3,3,3,3), respect=FALSE)
for(i in 1:length(Ys)){
  hist(bs10.est[,i], breaks=55, main=histYlabs[i], xlab="bootstrap ITT estimate \n (monitoring v. control)", xlim=xrange, cex.axis=0.8)
  abline(v=es10[i,1], col="red", lwd=2, lty=1)  
  abline(v=mean(bs10.est[,i]), col="black", lwd=2, lty=2)
  abline(v=c(mean(bs10.est[,i])-sd(bs10.est[,i]), mean(bs10.est[,i])+sd(bs10.est[,i])), col="grey80", lwd=1, lty=4)  
  abline(v=es10.ci[i,], col="grey80", lwd=2, lty=2)
  abline(v=bs10.basicci[i,], col=rgb(1,0,1,.5), lty=2)
  abline(v=bs10.pctci[i,], col=rgb(0,0,1,.3), lty=3)
  abline(v=bs10.normalci[i,], col=rgb(0,1,0,.5), lty=4)
  abline(v=bs10.tci[i,], col=rgb(1,0,0,.3), lty=2)  
}
plot(c(0,10), c(0,10), pch="", xaxt="n", yaxt="n", main="", xlab="", ylab="", bty="n")
legend(0,10,legend=c("ITT Estimate",
                     "Mean of Bootstrap ITT Estimates",
                     "+/- 1 sd, Bootstrap ITT Estimates",
                     "95% CI, Empirical Sandwich Var.",
                     "95% CI, Basic Bootstrap",                    
                     "95% CI, Percentile Bootstrap",
                     "95% CI, Normal Approx. Bootstrap",
                     "95% CI, Studentized Bootstrap"),
       col=c("red", "black", "grey80", "grey80", rgb(1,0,1,.5), rgb(0,0,1,.3), rgb(0,1,0,.5), rgb(1,0,0,.3)),
       lwd=rep(1.5, 8),
       lty=c(1,2,4,2,2,3,4,2),
       bty="n",
       seg.len=4)
dev.off()



xrange <- max(abs(range(c(es20[,1], bs20.est))))
xrange <- c(-xrange, xrange)

pdf("misc_bootstrap_bs_20_est.pdf", height=10, width=10)
#par(mfrow=c(3,3))
layout(matrix(c(1:9,0,10,0), nrow=4, ncol=3, byrow=TRUE), widths=c(3,3,3), heights=c(3,3,3,3), respect=FALSE)
for(i in 1:length(Ys)){
  hist(bs20.est[,i], breaks=55, main=histYlabs[i], xlab="bootstrap ITT estimate \n (punitive v. control)", xlim=xrange, cex.axis=0.8)
  abline(v=es20[i,1], col="red", lwd=2, lty=1)  
  abline(v=mean(bs20.est[,i]), col="black", lwd=2, lty=2)
  abline(v=c(mean(bs20.est[,i])-sd(bs20.est[,i]), mean(bs20.est[,i])+sd(bs20.est[,i])), col="grey80", lwd=1, lty=4)  
  abline(v=es20.ci[i,], col="grey80", lwd=2, lty=2)
  abline(v=bs20.basicci[i,], col=rgb(1,0,1,.5), lty=2)
  abline(v=bs20.pctci[i,], col=rgb(0,0,1,.3), lty=3)
  abline(v=bs20.normalci[i,], col=rgb(0,1,0,.5), lty=4)
  abline(v=bs20.tci[i,], col=rgb(1,0,0,.3), lty=2)  
}
plot(c(0,10), c(0,10), pch="", xaxt="n", yaxt="n", main="", xlab="", ylab="", bty="n")
legend(0,10,legend=c("ITT Estimate",
                     "Mean of Bootstrap ITT Estimates",
                     "+/- 1 sd, Bootstrap ITT Estimates",
                     "95% CI, Empirical Sandwich Var.",
                     "95% CI, Basic Bootstrap",                    
                     "95% CI, Percentile Bootstrap",
                     "95% CI, Normal Approx. Bootstrap",
                     "95% CI, Studentized Bootstrap"),
       col=c("red", "black", "grey80", "grey80", rgb(1,0,1,.5), rgb(0,0,1,.3), rgb(0,1,0,.5), rgb(1,0,0,.3)),
       lwd=rep(1.5, 8),
       lty=c(1,2,4,2,2,3,4,2),
       bty="n",
       seg.len=4)
dev.off()



xrange <- max(abs(range(c(es21[,1], bs21.est))))
xrange <- c(-xrange, xrange)

pdf("misc_bootstrap_bs_21_est.pdf", height=10, width=10)
#par(mfrow=c(3,3))
layout(matrix(c(1:9,0,10,0), nrow=4, ncol=3, byrow=TRUE), widths=c(3,3,3), heights=c(3,3,3,3), respect=FALSE)
for(i in 1:length(Ys)){
  hist(bs21.est[,i], breaks=55, main=histYlabs[i], xlab="bootstrap ITT estimate \n (punitive v. monitoring)", xlim=xrange, cex.axis=0.8)
  abline(v=es21[i,1], col="red", lwd=2, lty=1)  
  abline(v=mean(bs21.est[,i]), col="black", lwd=2, lty=2)
  abline(v=c(mean(bs21.est[,i])-sd(bs21.est[,i]), mean(bs21.est[,i])+sd(bs21.est[,i])), col="grey80", lwd=1, lty=4)  
  abline(v=es21.ci[i,], col="grey80", lwd=2, lty=2)
  abline(v=bs21.basicci[i,], col=rgb(1,0,1,.5), lty=2)
  abline(v=bs21.pctci[i,], col=rgb(0,0,1,.3), lty=3)
  abline(v=bs21.normalci[i,], col=rgb(0,1,0,.5), lty=4)
  abline(v=bs21.tci[i,], col=rgb(1,0,0,.3), lty=2)  
}
plot(c(0,10), c(0,10), pch="", xaxt="n", yaxt="n", main="", xlab="", ylab="", bty="n")
legend(0,10,legend=c("ITT Estimate",
                     "Mean of Bootstrap ITT Estimates",
                     "+/- 1 sd, Bootstrap ITT Estimates",
                     "95% CI, Empirical Sandwich Var.",
                     "95% CI, Basic Bootstrap",                    
                     "95% CI, Percentile Bootstrap",
                     "95% CI, Normal Approx. Bootstrap",
                     "95% CI, Studentized Bootstrap"),
       col=c("red", "black", "grey80", "grey80", rgb(1,0,1,.5), rgb(0,0,1,.3), rgb(0,1,0,.5), rgb(1,0,0,.3)),
       lwd=rep(1.5, 8),
       lty=c(1,2,4,2,2,3,4,2),
       bty="n",
       seg.len=4)
dev.off()



#============================================================================#
# assemble table
#============================================================================#

# Prep labels for output tables -- outcome labels, table section titles, table subsection titles

Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)
# Ylabs

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)

# Covariate adjusted estimates - table shell:
#   Col 1 = Covariate adj ITT estimate
#   Col 2 = Empirical sandwich SE estimate
#   Col 3 = Empirical sandwich 95% CI
#   Col 4 = Mean of bootstrap ITT estimates
#   Col 5 = SD of bootstrap ITT estimates
#   Col 6 = 95% CI - Studentized t
#   Col 7 = 95% CI - Basic bootstrap
#   Col 8 = 95% CI - Percentile bootstrap
#   Col 9 = 95% CI - Normal Approx

tab10 <- cbind(round(es10, 3),
               apply(es10.ci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               round(apply(bs10.est, 2, mean),3),
               round(apply(bs10.est, 2, sd),3),
               apply(bs10.tci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs10.basicci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs10.pctci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs10.normalci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") )
)
# tab10

tab20 <- cbind(round(es20, 3),
               apply(es20.ci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               round(apply(bs20.est, 2, mean),3),
               round(apply(bs20.est, 2, sd),3),
               apply(bs20.tci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs20.basicci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs20.pctci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs20.normalci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") )
)
# tab20

tab21 <- cbind(round(es21, 3),
               apply(es21.ci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               round(apply(bs21.est, 2, mean),3),
               round(apply(bs21.est, 2, sd),3),
               apply(bs21.tci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs21.basicci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs21.pctci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") ),
               apply(bs21.normalci, 1, function(x) paste("[",round(x[1],3),",",round(x[2],3),"]",sep="") )
)
# tab21


# stack panels into single table
tab.out <- rbind(tab10, tab20, tab21)

# add parentheses to SE estimates
tab.out[,2] <- paste("(", tab.out[,2], ")", sep="")
tab.out[,5] <- paste("(", tab.out[,5], ")", sep="")

tab.out <- cbind(Ylabs, tab.out)

colnames(tab.out) <- c("Outcome Measure", "Estimate", "SE", "95% CI", "Mean Bootstrap Est.", "Bootstrap SE", "Studentized", "Basic", "Percentile", "Normal Approx.")

# tab.out

# Write an unformatted table
write.csv(tab.out, "out_tablea15_itt_blockfe_covadj_full_UNFORMATTED.csv", row.names=FALSE)

start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
# start.rows; stop.rows

tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,9)),
                           c(sublabs[i], rep(NA,9)),
                           tab.out[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,9)), tab.out[start.rows[i]:stop.rows[i],])    
  }
}


tab.out2 <- do.call(rbind, tab.out2)
# tab.out2

## Export results to csv

write.csv(tab.out2, "out_tablea15_itt_blockfe_covadj_full.csv", row.names=FALSE)
```

The following code builds Table A15 which includes the covariate adjusted ITT estimates (produced by the code above) and the unadjusted ITT estimates (from Table A7, for comparison)

```{r tablea15, echo=T, eval=T, warning=F}
### Unadjusted estimates (two-group estimator)

un <- read.csv("out_tablea7_itt_2g_blockfe_nocovs.csv",
               header=TRUE, stringsAsFactors = FALSE)
un <- un[,c(1,2,3,6)]

### Covariate adjusted estimates (two-group estimator)

ca <- read.csv("out_tablea15_itt_blockfe_covadj_full.csv",
               header=TRUE, stringsAsFactors=FALSE)

# get rid of normal approx CIs
ca <- ca[,-10]

### Combine unadjusted with covariate adjusted
x <- cbind(ca[,1], un[,2:4], ca[,2:9])

# clean up colnames
names(x) <- c("Outcome Measure", "Estimate", "SE", "95% CI", "Estimate", "SE", "95% CI", "Mean", "SE", "95% Studentized CI", "95% Basic CI", "95% Percentile CI")

# Show table in Rmd output
kable(x, col.names=c("Outcome Measure", "Unadj. (Est.)", "Unadj. (SE)", "Unadj. (95% CI)",
                     "Cov adj (Est.)", "Cov adj (SE)", "Cov adj (95% CI)",
                     "Cov adj with bootstrap (Mean)", "Cov adj with bootstrap (SE)",
                     "Cov adj with bootstrap (95% Studentized CI)",
                     "Cov adj with bootstrap (95% Basic CI)",
                     "Cov adj with bootstrap (95% Percentile CI)"),
      caption="**Table A15.**")
```

```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
### Format to export as tex
out <- print(xtable(x, align="llrrr|rrr|rrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control", "\\textbf{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control", "\\hline \\textbf{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring", "\\hline \\textbf{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{1.4\\textwidth}{!}{",
         out[5:6],
         " & \\multicolumn{3}{c|}{Unadjusted} & \\multicolumn{3}{c|}{Covariate adjusted} & \\multicolumn{5}{c}{Covariate adjusted - bootstrap} \\\\ ",
         out[7:49],
         "}",
         "\\caption[Covariate adjusted ITT]{Covariate adjusted ITT estimates. The left panel presents the main estimates with block fixed effects and inverse probability weights. The middle panel presents covariate adjusted estimates with inverse probability weights; the uncertainty estimates are based on the empirical sandwich variance estimator by \\citet{YuanZhangDavidian:2012}. The right panel presents bootstrapped covariate adjusted estimates with 95\\% Studentized, basic, and percentile confidence intervals.}",
         "\\label{covadj}",
         out[50])

cat(out, file="out_tablea15_covadjitt.tex", sep="\n")
```


```{r eval=T, echo=F, warning=F}
# Recode frame and team_gender variables back to string (for subsequent analyses)
dat$frame <- ifelse(dat$frame == 0, "representative", "likely disc")
dat$team_gender <- ifelse(dat$team_gender == 0, "male", "female") # previously 0=male, 1=female
```

## Table A16 (p. A-37): Missingness Analysis: Estimated Correlation between Treatment Assignment and Missingness on Subject Index Measure

```{r tablea16, echo=T, eval=T, warning=F}
# Missingness analysis - Part 1: Does treatment predict missingness on index net discrim measure?
# Regression analysis with block FE and IPW

miss.index <- list(wb.10 = lm(is.na(index.wb) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   wb.20 = lm(is.na(index.wb) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   wb.21 = lm(is.na(index.wb) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21),
                   wh.10 = lm(is.na(index.wh) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   wh.20 = lm(is.na(index.wh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   wh.21 = lm(is.na(index.wh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21),
                   bh.10 = lm(is.na(index.bh) ~ TA1 + as.factor(block), data=dat[dat$TA %in% c(0,1),], weights=ipw10),
                   bh.20 = lm(is.na(index.bh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(0,2),], weights=ipw20),
                   bh.21 = lm(is.na(index.bh) ~ TA2 + as.factor(block), data=dat[dat$TA %in% c(1,2),], weights=ipw21))

miss.index.out <- cbind(do.call(rbind, lapply(miss.index, function(x) summary(x)$coefficient[2,])), # estimates
                        unlist(lapply(miss.index, function(x) summary(x)$fstatistic[1])), # f test, f statistic
                        unlist(lapply(miss.index, lmp))) # f-test p value

miss.index.out <- apply(miss.index.out, 2, function(x) round(x, 3))

miss.index.out <- cbind(rep(c("Monitoring vs. Control", "Punitive vs. Control", "Punitive vs. Monitoring"), 3),
                        miss.index.out)
colnames(miss.index.out) <- c("Comparison", "Estimate", "SE", "t", "p-value", "F-statistic", "F-test p-value")
rownames(miss.index.out) <- NULL

out.miss <- rbind(c("A. White vs. Black", rep(NA, 6)),
                  miss.index.out[1:3,],
                  c("B. White vs. Hispanic", rep(NA, 6)),
                  miss.index.out[4:6,],
                  c("C. Black vs. Hispanic", rep(NA, 6)),
                  miss.index.out[7:9,])

kable(out.miss, caption="**Table A16**")
```

```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
out.miss <- print(xtable(out.miss, align="llrrrrrr"), include.rownames=FALSE)
out.miss <- unlist(strsplit(out.miss, "\n"))

out.miss <- c(out.miss[3:4],
         "\\resizebox{.9\\textwidth}{!}{",
         out.miss[5:22],
         "}",
         "\\caption[Predicting missingness on subjective index as a function of treatment assignment]{The estimated correlation between treatment assignment and missingness on the subjective index measure of net discrimination in interactions during appointments, estimated from OLS models regressing missingness on treatment assignment and block fixed effects with inverse probability weighting. The F-statistic and F-test p-value tests the null hypothesis that all coefficients equal zero.}",
         "\\label{predmissindex}",
         out.miss[23])

cat(out.miss, file="out_tablea16_pred_missing_net_index.tex", sep="\n")
```

## Table A17 (p. A-37): Missingness Analysis: Pairwise Correlations between Missing Subjective Index Measures and Objective Net Discrimination Measures

```{r, eval=T, echo=T, warning=F}
# Part 2: Is missingness correlated with offers or callbacks?

miss.cor <- cor(cbind(dat[,c(outvars.wb[3:4], outvars.wh[3:4], outvars.bh[3:4])], is.na(dat$index.wb), is.na(dat$index.wh), is.na(dat$index.bh)))

miss.cor <- miss.cor[7:9,1:6]

miss.cor <- apply(miss.cor, 2, function(x) round(x,2))
rownames(miss.cor) <- paste("Missing Index Measure", c("W-B", "W-H", "B-H"), sep=", ")
miss.cor <- rbind(rep(c("Callbacks", "Offers"), 3), miss.cor)
colnames(miss.cor) <- c("W-B", "W-B", "W-H", "W-H", "B-H", "B-H") # label for tex output

# output for Rmd file only (omit first row of tex table and relabel column names)
kable(miss.cor[-1,], col.names = c("W-B Callbacks", "W-B Offers", "W-H Callbacks", "W-H Offers", "B-H Callbacks", "B-H Offers"), caption="**Table A17**")
```

```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
miss.cor <- print(xtable(miss.cor, align="l|cc|cc|cc"))
miss.cor <- unlist(strsplit(miss.cor, "\n"))

miss.cor <- c(miss.cor[3:4],
              "\\resizebox{1\\textwidth}{!}{",
              miss.cor[5:6],
              " \\cline{2-7}",
              " & \\multicolumn{2}{c|}{White vs. Black} &   \\multicolumn{2}{c|}{White vs. Hispanic} &  \\multicolumn{2}{c}{Black vs. Hispanic}  \\\\",
              miss.cor[9],
              "\\hline",
              miss.cor[10:14],
              "}",
              "\\caption[Pairwise correlations between missing subjective net discrimination index measures and objective net discrimination measures]{Pairwise correlations between missing subjective net discrimination index measures and objective net discrimination measures.}",
              "\\label{pwmisscorr}",
              miss.cor[15])


cat(miss.cor, file="out_tablea17_pwmisscorr.tex", sep="\n")
```

## Table A18 (p. A-37): Missingness Analysis: Predicting Missingness on Subjective Indicators As a Function of Tester Race

```{r tablea18, echo=T, eval=T, warning=F}
# Part 3: Missingness as a function of tester race/ethnicity

miss.Ys <- c("sales", "qualpraise", "posbg", "posedit", "prof")
coefs <- fstat <- fp <- list()
for(i in 1:length(miss.Ys)){
  fmla <- paste0("is.na(",miss.Ys[i],") ~ ttype")
  fit <- lm(formula=fmla, data=ctx)
  coefs[[i]] <- apply(summary(fit)$coefficients, 2, function(x) round(x,3))
  fstat[[i]] <- summary(fit)$fstatistic[1]
  fp[[i]] <- lmp(fit)
}

# for subjective measures used to make index - results same across items (keep element #3)
miss3 <- rbind(cbind(rownames(coefs[[1]]), coefs[[1]]),
               c("F-statistic:", round(fstat[[i]], 3), rep(NA, 3)),
               c("F test p-value:", round(fp[[1]], 3), rep(NA, 3)))
miss3[,1] <- gsub("ttypeB", "Hispanic tester", miss3[,1], fixed=TRUE)
miss3[,1] <- gsub("ttypeC", "White tester", miss3[,1], fixed=TRUE)
rownames(miss3) <- NULL
colnames(miss3) <- c("Variable", "Estimate", "SE", "t", "p-value")

kable(miss3, caption="**Table A18**")
```

```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# format and export tex 
miss3 <- print(xtable(miss3, align="llcccc"), include.rownames=FALSE)
miss3 <- unlist(strsplit(miss3, "\n"))

miss3 <- c(miss3[3:4],
              miss3[5:11],
              "\\hline",
              miss3[12:15],
              "\\caption[Predicting missingness on subjective indicators of favorable treatment during appointments as a function of tester race]{The table presents OLS estimates from a regression predicting missingness on subjective indicators of favorable treatment during appointments as a function of tester race. The reference racial group is black. The F-test results reported at the bottom of the table are for a test of the null hypothesis that all coefficients equal zero.}",
              "\\label{predmisstesterrace}",
              miss3[16])

cat(miss3, file="out_tablea18_pred_missing_tester_race.tex", sep="\n")
```

## Table A19 (p. A-38): Balance Table for Categorical Covariates

```{r balanceprep, echo=F, eval=T, warning=F}
## CODE TO PREP DATA TO PRODUCE BALANCE TABLES

#-----------------------------------------------------------------------------#
# Split covariates into numeric vs factor variables
dat$o_rent <- ifelse(dat$rent == -9, NA, dat$rent)
dat$o_sqft <- ifelse(dat$sqft == -9, NA, dat$sqft)
Xs.num <- c("numbr", "o_rent", "o_sqft", "regime1", "regime2", "regime3", 
            "nnumattr_bh", "nnumattr_wh", "nnumattr_wb", "nnumskep_bh", "nnumskep_wh", "nnumskep_wb", 
            "npctskep_bh", "npctskep_wh", "npctskep_wb", "nnumpos_bh" , "nnumpos_wh" , "nnumpos_wb" , 
            "nnumneu_bh" , "nnumneu_wh" , "nnumneu_wb" , "nnumneg_bh" , "nnumneg_wh" , "nnumneg_wb" , 
            "npctpos_bh" , "npctpos_wh" , "npctpos_wb" , "npctneu_bh" , "npctneu_wh" , "npctneu_wb" , 
            "npctneg_bh" , "npctneg_wh" , "npctneg_wb" , "nanyskep_bh", "nanyskep_wh", "nanyskep_wb", 
            "nanyneg_bh" , "nanyneg_wh" , "nanyneg_wb" ,
            "broker", "callorder.wb", "callorder.wh", "callorder.bh", 
            "incrank.wb.gt", "incrank.wh.gt", "incrank.bh.gt",
            "incrank.wb.eq", "incrank.wh.eq", "incrank.bh.eq",
            "incrank.wb.lt", "incrank.wh.lt", "incrank.bh.lt",
            "boro.brx", "boro.brk", "boro.mnh", "boro.que", "boro.stn", 
            "inc.w.hi", "inc.b.hi", "inc.h.hi",
            llrace, llage, testerfe )
Xs.cat <- Xs[!(Xs %in% Xs.num)]
Xs.cat <- Xs.cat[!grepl("+", Xs.cat, fixed=TRUE)]
```

```{r tablea19, echo=T, eval=T, warning=F}
# CREATE BALANCE TABLES (Both categorical and continuous variables)
# unweighted, all arms
bt_unwtd = balanceTables(d=dat, z="TA", xcon=Xs.num, xcat=Xs.cat)
# weighted, all arms
bt_wtd = balanceTables(d=dat, z="TA", xcon=Xs.num, xcat=Xs.cat, weight="ipw")

# BALANCE TABLES: CATEGORICAL VARIABLES ONLY
bt_xcat = cbind(bt_unwtd$bt_cat[,1:4],
                bt_wtd$bt_cat[,2:4],
                bt_unwtd$bt_cat[,5:7],
                bt_wtd$bt_cat[,5:7],
                bt_unwtd$bt_cat[,8:10],
                bt_wtd$bt_cat[,8:10])
bt_xcat[,1] = c("Frame", "Likely discrimination", "Representative", "Household size", "1", "2", "Tester team gender", "Female", "Male")

kable(bt_xcat[,c(1:7)], 
      caption="**Table A19, Left Panel: Control Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))

kable(bt_xcat[,c(1, 8:13)], 
      caption="**Table A19, Middle Panel: Monitoring Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))

kable(bt_xcat[,c(1, 14:19)], 
      caption="**Table A19, Right Panel: Punitive Group**",
      col.names=c("Variable", "Unweighted Proportion", "Unweighted SE", "Unweighted N",
                  "Weighted Proportion", "Weighted SE", "Weighted N"))
```

```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and output to tex
bt_xcat = print(xtable(bt_xcat, align="llccc|ccc|ccc|ccc|ccc|ccc"), include.rownames=FALSE)
bt_xcat = unlist(strsplit(bt_xcat, "\n"))

bt_xcat = c(bt_xcat[3:4],
            "\\resizebox{1.3\\textwidth}{!}{",
            bt_xcat[5:6],
            "& \\multicolumn{6}{c}{Control} & \\multicolumn{6}{c}{Monitoring} & \\multicolumn{6}{c}{Punitive} \\\\ ",
            "& \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} & \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} & \\multicolumn{3}{c}{Unweighted} & \\multicolumn{3}{c}{Weighted} \\\\",
            "& Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N & Pct & SE & N \\\\ ",
            bt_xcat[8:19],
            "}",
            "\\caption[Balance Table, Categorical Covariates]{Balance Table for Categorical Covariates. Cells contain proportions, standard errors, and counts.}",
            "\\label{balance_cat}",
            bt_xcat[20])


bt_xcat = gsub("Frame &", "\\textbf{Frame} &", bt_xcat, fixed=TRUE)
bt_xcat = gsub("Household size &", "\\textbf{Household size} &", bt_xcat, fixed=TRUE)
bt_xcat = gsub("Tester team gender &", "\\textbf{Tester team gender} &", bt_xcat, fixed=TRUE)

cat(bt_xcat, file="out_tablea19_balance_table_categoricalvars.tex", sep="\n")
```

## Table A20 (p. A-38 to A-44): Balance Table for Continuous Covariates

```{r tablea20, eval=T, echo=T, warning=F}
bt_xcon = cbind(bt_unwtd$bt_con[,1:2],
                bt_wtd$bt_con[,2],
                bt_unwtd$bt_con[,3],
                bt_wtd$bt_con[,3],
                bt_unwtd$bt_con[,4],
                bt_wtd$bt_con[,4])

# reorder table output by covariate group
xcon_groups = list(Apartment.Characteristics = c(1:3, 53:57),
                   Randomization.Regime = 4:6,
                   Early.Stage.Discrimination = 7:39,
                   Subject.Characteristics = c(40, 61:69),
                   Tester.Call.Order = 41:43,
                   Testers.Assumed.Income = c(44:52, 58:60),
                   Tester.Fixed.Effects = 70:118)

con_tab = list()
for(i in 1:length(xcon_groups)){
  vals = xcon_groups[[i]]
  temp = list()
  for(j in 1:length(vals)){
    temp[[j]] = bt_xcon[c(3*vals[j]-2, 3*vals[j]-1, 3*vals[j]),]
  }
  temp = do.call(rbind, temp)
  temp = rbind(c(gsub(".", " ", names(xcon_groups)[i], fixed=TRUE), rep(NA, ncol(temp)-1)), temp)
  names(temp)[3:7] = c("Z=0 wt", "Z=1", "Z=1 wt", "Z=2", "Z=2 wt")
  con_tab[[i]] = temp
}
con_tab = do.call(rbind, con_tab)
rownames(con_tab) = NULL
colnames(con_tab) <-  c("Variable", "Control-Unweighted", "Control-Weighted", "Monitoring-Unweighted","Monitoring-Weighted", "Punitive-Unweighted", "Punitive-Weighted")

con_tab_labs = Xs.num
con_tab_labs = gsub("tid.", "Tester ID ", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("numbr", "Advertised number of bedrooms", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("o_rent", "Advertised monthly rental price", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("o_sqft", "Advertised square footage", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("regime", "Regime ", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.brx", "Borough: Bronx", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.brk", "Borough: Brooklyn", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.mnh", "Borough: Manhattan", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.que", "Borough: Queens", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("boro.stn", "Borough: Staten Island", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.gt", "Assumed Tester Incomes: White > Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.gt", "Assumed Tester Incomes: White > Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.gt", "Assumed Tester Incomes: Black > Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.eq", "Assumed Tester Incomes: White = Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.eq", "Assumed Tester Incomes: White = Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.eq", "Assumed Tester Incomes: Black = Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wb.lt", "Assumed Tester Incomes: White < Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.wh.lt", "Assumed Tester Incomes: White < Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("incrank.bh.lt", "Assumed Tester Incomes: Black < Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.w.hi", "Assumed Tester Incomes: White Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.b.hi", "Assumed Tester Incomes: Black Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("inc.h.hi", "Assumed Tester Incomes: Hispanic Highest", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_api", "Modal Perception of Landlord Race among Testers: Asian", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_blk", "Modal Perception of Landlord Race among Testers: Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_hsp", "Modal Perception of Landlord Race among Testers: Hispanic/Latino", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_wht", "Modal Perception of Landlord Race among Testers: White", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_18to34", "Modal Perception of Landlord Age among Testers: 18 to 34", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_35to44", "Modal Perception of Landlord Age among Testers: 35 to 44", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_45to64", "Modal Perception of Landlord Age among Testers: 45 to 64", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_65over", "Modal Perception of Landlord Age among Testers: 65 and up", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("primary_age_unknown", "Modal Perception of Landlord Age among Testers: Unknown/No Consensus", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.wb", "Randomized Tester Call Order: White before Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.wh", "Randomized Tester Call Order: White before Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("callorder.bh", "Randomized Tester Call Order: Black before Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("broker", "Subject is a Broker", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_wb", ": White-Black", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_wh", ": White-Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("_bh", ": Black-Hispanic", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumattr", "Net Diff. in Num. Attributes Inquired About over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumskep", "Net Diff. in Num. Attributes Eliciting Skeptical Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctskep", "Net Diff. in Pct. of Attributes Raised Eliciting Skeptical Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumpos", "Net Diff. in Num. Attributes Eliciting Positive Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumneu", "Net Diff. in Num. Attributes Eliciting Neutral Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nnumneg", "Net Diff. in Num. Attributes Eliciting Negative Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctpos", "Net Diff. in Pct. of Attributes Raised Eliciting Positive Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctneu", "Net Diff. in Pct. of Attributes Raised Eliciting Neutral Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("npctneg", "Net Diff. in Pct. of Attributes Raised Eliciting Negative Reaction over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nanyskep", "Net Diff. in Receiving Any Skeptical Reaction to Attributes over Phone", con_tab_labs, fixed=TRUE)
con_tab_labs = gsub("nanyneg", "Net Diff. in Receiving Any Negative Reaction to Attributes over Phone", con_tab_labs, fixed=TRUE)

for(i in 1:length(Xs.num)){
  indpos=which(con_tab[,1]==Xs.num[i])
  con_tab[indpos,1] = con_tab_labs[i]
}

# produce version to display w/ kable() -- remove all rows w/ all NAs that are included for tex formatting
con_tab_rmd <- con_tab[apply(con_tab, 1, function(J) sum(is.na(J)) != ncol(con_tab) ),]
rownames(con_tab_rmd) <- NULL
kable(con_tab_rmd, caption="**Table A20**")
```


```{r, eval=T, echo=F, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output

# format and export tex
con_tab_head = c("\\begin{center}",
                 "\\begin{longtable}{lcccccc}",
                 "\\caption[Balance Table, Continuous Covariates]{Balance Table for Continuous Covariates. Cells contain means and standard deviations in parentheses.} \\label{balance} \\\\",
                 "\\hline ",
                 " & \\multicolumn{2}{c}{Control} & \\multicolumn{2}{c}{Monitoring} & \\multicolumn{2}{c}{Punitive} \\\\",
                 "Variable & Unweighted & Weighted & Unweighted & Weighted & Unweighted & Weighted \\\\",
                 "\\hline ",
                 "\\endfirsthead",
                 "\\hline ",
                 " & \\multicolumn{2}{c}{Control} & \\multicolumn{2}{c}{Monitoring} & \\multicolumn{2}{c}{Punitive} \\\\",
                 "Variable & Unweighted & Weighted & Unweighted & Weighted & Unweighted & Weighted \\\\",
                 "\\hline ",
                 "\\endhead",
                 "\\hline \\multicolumn{7}{r}{(continued)} ",
                 "\\endfoot",
                 "\\hline \\hline",
                 "\\endlastfoot")

con_tab_end = c("\\end{longtable}", "\\end{center}")

con_tab_out = print(xtable(con_tab), include.rownames=FALSE)
con_tab_out = unlist(strsplit(con_tab_out, "\n"))

con_tab_out = gsub("Apartment Characteristics &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{I. Apartment Characteristics}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Randomization Regime &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{II. Randomization Regime}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Early Stage Discrimination &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{III. Early Stage Discrimination}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Subject Characteristics &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{IV. Subject (Landlord/Broker) Characteristics}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Tester Call Order &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{V. Tester Call Order}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Testers Assumed Income &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{VI. Testers' Assumed Income}",
                   con_tab_out, fixed=TRUE)

con_tab_out = gsub("Tester Fixed Effects &  &  &  &  &  & ",
                   "\\multicolumn{7}{c}{VII. Tester Fixed Effects}",
                   con_tab_out, fixed=TRUE)

con_tab_out = c(con_tab_head,
                con_tab_out[9:369],
                con_tab_end)
cat(con_tab_out, file="out_tablea20_balance_table_continuousvars.tex", sep="\n")
```

## Table A21 (p. A-46): Sensitivity Analysis: Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases

```{r tablea21, echo=T, warning=F}
col.labels <- c("Outcome",c("Estimate","SE","t","P"))

# ---------------------------------------------------------------------- #
# MODELS WITH ONLY BLOCK FIXED EFFECTS

itt.wb.mc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pc <- matrix(NA, nrow=length(outvars.wb), ncol=5)
itt.wb.pm <- matrix(NA, nrow=length(outvars.wb), ncol=5)

itt.wh.mc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pc <- matrix(NA, nrow=length(outvars.wh), ncol=5)
itt.wh.pm <- matrix(NA, nrow=length(outvars.wh), ncol=5)

itt.bh.mc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pc <- matrix(NA, nrow=length(outvars.bh), ncol=5)
itt.bh.pm <- matrix(NA, nrow=length(outvars.bh), ncol=5)

fit.wb.mc <- fit.wb.pc <- fit.wb.pm <- fit.wh.mc <- fit.wh.pc <- fit.wh.pm <- fit.bh.mc <- fit.bh.pc <- fit.bh.pm <- list() # store results from ITT est w/ block FE (no other covs)

for(i in 1:length(outvars.wb)){  
  
  model.mc <- paste(outvars.wb[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wb[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.wb.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1) & dat$frame == "representative",], weights=ipw10)
  fit.wb.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2) & dat$frame == "representative",], weights=ipw20)
  fit.wb.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2) & dat$frame == "representative",], weights=ipw21)
  
  itt.wb.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wb.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wb.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
  
  itt.wb.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wb.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
  
}

for(i in 1:length(outvars.wh)){  
  
  model.mc <- paste(outvars.wh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.wh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.wh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1) & dat$frame == "representative",], weights=ipw10)
  fit.wh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2) & dat$frame == "representative",], weights=ipw20)
  fit.wh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2) & dat$frame == "representative",], weights=ipw21)
  
  itt.wh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.wh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.wh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
  
  itt.wh.mc[i,5] <- pt(coef(summary(fit.mc))[,3], summary(fit.mc)$df[2], lower=TRUE)[2] #one sided p
  itt.wh.pc[i,5] <- pt(coef(summary(fit.pc))[,3], summary(fit.pc)$df[2], lower=TRUE)[2] #one sided p
  
}

for(i in 1:length(outvars.bh)){  
  
  model.mc <- paste(outvars.bh[i]," ~ TA1 + as.factor(block)",sep="")
  model.pc <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  model.pm <- paste(outvars.bh[i]," ~ TA2 + as.factor(block)",sep="")
  
  fit.bh.mc[[i]] <- fit.mc <- lm(formula=model.mc, data=dat[dat$TA %in% c(0,1) & dat$frame == "representative",], weights=ipw10)
  fit.bh.pc[[i]] <- fit.pc <- lm(formula=model.pc, data=dat[dat$TA %in% c(0,2) & dat$frame == "representative",], weights=ipw20)
  fit.bh.pm[[i]] <- fit.pm <- lm(formula=model.pm, data=dat[dat$TA %in% c(1,2) & dat$frame == "representative",], weights=ipw21)
  
  itt.bh.mc[i,2:5] <- summary(fit.mc)$coefficients[2,]
  itt.bh.pc[i,2:5] <- summary(fit.pc)$coefficients[2,]
  itt.bh.pm[i,2:5] <- summary(fit.pm)$coefficients[2,]
  
}

# Add outcome measure labels
itt.wb.mc[,1] <- outvars.wb.labs
itt.wb.pc[,1] <- outvars.wb.labs
itt.wb.pm[,1] <- outvars.wb.labs

itt.wh.mc[,1] <- outvars.wh.labs
itt.wh.pc[,1] <- outvars.wh.labs
itt.wh.pm[,1] <- outvars.wh.labs

itt.bh.mc[,1] <- outvars.bh.labs
itt.bh.pc[,1] <- outvars.bh.labs
itt.bh.pm[,1] <- outvars.bh.labs

# Add column names
colnames(itt.wb.mc) <- colnames(itt.wb.pc) <- colnames(itt.wb.pm) <- col.labels
colnames(itt.wh.mc) <- colnames(itt.wh.pc) <- colnames(itt.wh.pm) <- col.labels
colnames(itt.bh.mc) <- colnames(itt.bh.pc) <- colnames(itt.bh.pm) <- col.labels

# Stack, create output table
out2 <- rbind(itt.wb.mc, itt.wh.mc, itt.bh.mc,
              itt.wb.pc, itt.wh.pc, itt.bh.pc,
              itt.wb.pm, itt.wh.pm, itt.bh.pm)

out2 <- out2[-seq(1,33,4),]

#----- CIs -----#

df.wb.mc <- unlist(lapply(fit.wb.mc, function(x) summary(x)$df[2]))[2:4]
df.wb.pc <- unlist(lapply(fit.wb.pc, function(x) summary(x)$df[2]))[2:4]
df.wb.pm <- unlist(lapply(fit.wb.pm, function(x) summary(x)$df[2]))[2:4]

df.wh.mc <- unlist(lapply(fit.wh.mc, function(x) summary(x)$df[2]))[2:4]
df.wh.pc <- unlist(lapply(fit.wh.pc, function(x) summary(x)$df[2]))[2:4]
df.wh.pm <- unlist(lapply(fit.wh.pm, function(x) summary(x)$df[2]))[2:4]

df.bh.mc <- unlist(lapply(fit.bh.mc, function(x) summary(x)$df[2]))[2:4]
df.bh.pc <- unlist(lapply(fit.bh.pc, function(x) summary(x)$df[2]))[2:4]
df.bh.pm <- unlist(lapply(fit.bh.pm, function(x) summary(x)$df[2]))[2:4]

df.2g.10 <- c(df.wb.mc, df.wh.mc, df.bh.mc)
df.2g.20 <- c(df.wb.pc, df.wh.pc, df.bh.pc)
df.2g.21 <- c(df.wb.pm, df.wh.pm, df.bh.pm)

cv.2g.10 <- qt(p=0.025, df=df.2g.10, lower.tail=TRUE)
cv.2g.20 <- qt(p=0.025, df=df.2g.20, lower.tail=TRUE)
cv.2g.21 <- qt(p=0.025, df=df.2g.21, lower.tail=TRUE)

cv.2g <- c(cv.2g.10, cv.2g.20, cv.2g.21)

# Assemble vector of 95% CIs
itt.2g.ci <- as.numeric(out2[,2]) + abs(cv.2g)*cbind(-as.numeric(out2[,3]), as.numeric(out2[,3]))
table_a21_cis <- apply(itt.2g.ci, 1, function(x) paste("[", round(x[1],3) ,", ", round(x[2],3), "]", sep=""))
# Round and format results
for(i in 2:ncol(out2)) out2[,i] <- round(as.numeric(out2[,i]),3)
for(i in 5) out2[,i] <- paste("(",out2[,i],")",sep="")
# Attach CIs
table_a21 <- cbind(out2, table_a21_cis)
# Clean up column names
colnames(table_a21) <- c("Outcome", "Estimate", "SE", "t", "p-value", "95% CI")
# Save unformatted table
write.csv(table_a21, "out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs_UNFORMATTED.csv", row.names=FALSE)

# display table in Rmd output file
kable(table_a21[1:9,], caption="**Table A21, Panel I. Monitoring vs. Control**")
kable(table_a21[10:18,], caption="**Table A21, Panel II. Punitive vs. Control**")
kable(table_a21[19:27,], caption="**Table A21, Panel III. Punitive vs. Monitoring**")
```

```{r echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# Format table

start.rows <- seq(1,27,3)
stop.rows <- start.rows + 2
#start.rows; stop.rows


Ylabs <- c("Index measure of favorable in-person interactions",
           "Received post-visit callback",
           "Received post-visit offer")
Ylabs <- rep(Ylabs, 9)
#Ylabs

seclabs <- c("I. Monitoring vs. Control", "II. Punitive vs. Control", "III. Punitive vs. Monitoring")
sublabs <- rep(c("A. White vs. Black", "B. White vs. Hispanic", "C. Black vs. Hispanic"), 3)


tab.out2 <- list()
seclabs.index <- 0
for(i in 1:length(start.rows)){
  if(i %in% c(1,4,7)){
    seclabs.index <- seclabs.index + 1
    tab.out2[[i]] <- rbind(c(seclabs[seclabs.index], rep(NA,5)),
                           c(sublabs[i], rep(NA,5)),
                           table_a21[start.rows[i]:stop.rows[i],])        
  } else {
    tab.out2[[i]] <- rbind(c(sublabs[i], rep(NA,5)), table_a21[start.rows[i]:stop.rows[i],])    
  }
}

tab.out2 <- do.call(rbind, tab.out2)

out <- print(xtable(tab.out2, align="llrrrrr", digits=3), include.rownames=FALSE)
out <- unlist(strsplit(out, "\n"))

out <- gsub("A. White vs. Black", "\\underline{A. White vs. Black}", out, fixed=TRUE)
out <- gsub("B. White vs. Hispanic", "\\underline{B. White vs. Hispanic}", out, fixed=TRUE)
out <- gsub("C. Black vs. Hispanic", "\\underline{C. Black vs. Hispanic}", out, fixed=TRUE)
out <- gsub("I. Monitoring vs. Control &  &  &  &  & ", "\\multicolumn{6}{c}{I. Monitoring vs. Control}", out, fixed=TRUE)
out <- gsub("II. Punitive vs. Control &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{II. Punitive vs. Control}", out, fixed=TRUE)
out <- gsub("III. Punitive vs. Monitoring &  &  &  &  & ", "\\hline \\multicolumn{6}{c}{III. Punitive vs. Monitoring}", out, fixed=TRUE)

out <- gsub("(White vs. Black)", "", out, fixed=TRUE)
out <- gsub("(White vs. Hispanic)", "", out, fixed=TRUE)
out <- gsub("(Black vs. Hispanic)", "", out, fixed=TRUE)

out <- c(out[3:4],
         "\\resizebox{.8\\textwidth}{!}{",
         out[5:49],
         "}",
         "\\caption[Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases]{Estimated Effects of Messaging on Net Discrimination Levels Among Subsample Excluding Likely Discrimination Cases. Cells contain ITT estimates from OLS models with inverse probability weights and block fixed effects. For each reference group versus comparison group pairing, outcomes are net discrimination measures against the comparison group relative to the reference group. Estimated effects that are positive (negative) are interpreted as increases (decreases) in net discrimination against the comparison group relative to the reference group. Estimated p-values are reported in parentheses; p-values correspond to a one-sided test of the null hypothesis of equality of means for the monitoring-control and punitive-control comparisons, and to a two-sided test of the null hypothesis of equality of means for the punitive-monitoring comparison and for all analyses involving net discrimination against Hispanic (vs. black) testers.}",
         "\\label{ittnocov}",
         out[50])

cat(out, file="out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs.tex", sep="\n")
write.csv(tab.out2, file="out_tablea21_SA_ExcludeLD_itt_2g_blockfe_nocovs.csv", row.names=FALSE)
```

## Table A22 (p. A-47): Distribution of Subjects by their Perceived Race

```{r tablea22, echo=T, warning=F}
# with(dat, table(primary_wht, primary_blk)) # 3 are both black and white
# with(dat, table(primary_wht, primary_hsp)) # 8 are both hispanic and white
# with(dat, table(primary_wht, primary_api)) # 4 are both asian and white
dat$subj_race <- rep(NA, nrow(dat))
dat$subj_race <- ifelse(dat$primary_wht == 1 & dat$primary_blk != 1 & dat$primary_api != 1 & dat$primary_hsp != 1, "White", dat$subj_race)
dat$subj_race <- ifelse(dat$primary_wht != 1 & dat$primary_blk == 1 & dat$primary_api != 1 & dat$primary_hsp != 1, "Black", dat$subj_race)
dat$subj_race <- ifelse(dat$primary_wht != 1 & dat$primary_blk != 1 & dat$primary_api != 1 & dat$primary_hsp == 1, "Hispanic", dat$subj_race)
dat$subj_race <- ifelse(is.na(dat$subj_race), "Other", dat$subj_race)

tab_pcvdrace <- cbind(table(dat$subj_race), round(table(dat$subj_race)/nrow(dat), 2))
colnames(tab_pcvdrace) <- c("N", "Proportion")
pcvrace <- tab_pcvdrace
kable(tab_pcvdrace, caption="**Table A22**", col.names=c("Number of Subjects", "Proportion"))
```

```{r echo=F, eval=T, warning=F}
##### NOTE: set eval=T to export tex; eval=F suppresses tex output in the Rmd output
# export results to tex
tab_pcvdrace <- print(xtable(tab_pcvdrace), include.rownames=TRUE)
tab_pcvdrace <- unlist(strsplit(tab_pcvdrace, "\n"))

tab_pcvdrace <- c(tab_pcvdrace[1:4],
                  "\\caption{Distribution of Subjects by their Perceived Race. A subject is classified as Black, Hispanic, or White if at least two testers in a matched trio perceive them to belong to that racial group. All other subjects are classified in the Other category.}\\label{pcvrace}",
                  "\\begin{tabular}{lrr}",
                  tab_pcvdrace[6],
                  "Subject's Perceived Race & Number of Subjects & Proportion \\\\",
                  tab_pcvdrace[8:15])

cat(tab_pcvdrace, file="out_tablea22_perceived_race_of_subjects.tex", sep="\n")
```

## Figure A6 (p. A-48): Estimated Effects of Monitoring Messaging on Net Discrimination Levels Relative to Control, by the Perceived Race of the Landlord/Broker

```{r figurea6, echo=T, warning=F, fig.width=11, fig.height=7}
# monitoring-control
fit_mc_llrace <- do.call(rbind, 
                         lapply(names(table(dat$subj_race)), function(J){
                           x<-rbind(summary(lm(ncb_wb ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(ncb_wh ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(noff_wb ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",],
                                    summary(lm(noff_wh ~ TA1 + factor(block), data=dat, subset=(TA %in% c(0,1) & subj_race==J), weights=ipw10))$coefficients["TA1",]
                           )
                           x<-data.frame(Pair = c("Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)"),
                                         Outcome = c("Callback", "Callback", "Offer", "Offer"),
                                         SubjectRace = J,
                                         x,
                                         stringsAsFactors=FALSE)
                           return(x)
                         }))
# table(dat$subj_race[dat$TA %in% c(0,1)])
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Black", "Black\n(n=48)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Hispanic", "Hispanic\n(n=61)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "Other", "Other\n(n=115)", fit_mc_llrace$SubjectRace)
fit_mc_llrace$SubjectRace <- ifelse(fit_mc_llrace$SubjectRace == "White", "White\n(n=229)", fit_mc_llrace$SubjectRace)

fit_mc_llrace$SubjectRace <- factor(fit_mc_llrace$SubjectRace, levels=c("White\n(n=229)", "Black\n(n=48)", "Hispanic\n(n=61)", "Other\n(n=115)"))

ggplot(fit_mc_llrace, aes(group=SubjectRace, colour=SubjectRace)) + 
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.96*Std..Error, ymax=Estimate+1.96*Std..Error), lwd=1, position=position_dodge(width=1/2)) +
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.64*Std..Error, ymax=Estimate+1.64*Std..Error), lwd=2, position=position_dodge(width=1/2)) +
  geom_point(aes(x=Outcome, y=Estimate), size=3.5, position=position_dodge(width=1/2)) + 
  scale_y_continuous(limits = c(-.5, .5), breaks=seq(-.5, .5, .1)) +
  xlab("\nNet Discrimination Outcome") +
  ylab("\nEstimated Effect of Sending\nMonitoring Message (vs. Control)\n") +
  geom_hline(yintercept=0, lty=2) + facet_wrap( ~ Pair) +
  scale_colour_brewer(palette = "Dark2", guide = guide_legend(title = "Perceived Race of Landlord/Broker:")) +
  theme_bw(base_size=15) + theme(legend.position="bottom", legend.key = element_blank())
ggsave("out_figurea6_appx_hetfxSUBJRACE_mc.pdf", width=11, height=7)
```

## Figure A7 (p. A-49): Estimated Effects of Punitive Messaging on Net Discrimination Levels Relative to Control, by the Perceived Race of the Landlord/Broker

```{r figurea7, echo=T, warning=F, fig.width=11, fig.height=7}
# punitive-control
fit_pc_llrace <- do.call(rbind, 
                         lapply(names(table(dat$subj_race)), function(J){
                           x<-rbind(summary(lm(ncb_wb ~ TA2 + factor(block), data=dat, subset=(TA %in% c(0,2) & subj_race==J), weights=ipw20))$coefficients["TA2",],
                                    summary(lm(ncb_wh ~ TA2 + factor(block), data=dat, subset=(TA %in% c(0,2) & subj_race==J), weights=ipw20))$coefficients["TA2",],
                                    summary(lm(noff_wb ~ TA2 + factor(block), data=dat, subset=(TA %in% c(0,2) & subj_race==J), weights=ipw20))$coefficients["TA2",],
                                    summary(lm(noff_wh ~ TA2 + factor(block), data=dat, subset=(TA %in% c(0,2) & subj_race==J), weights=ipw20))$coefficients["TA2",]
                           )
                           x<-data.frame(Pair = c("Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Blacks (vs. Whites)", 
                                                  "Net Discrimination\nAgainst Hispanics (vs. Whites)"),
                                         Outcome = c("Callback", "Callback", "Offer", "Offer"),
                                         SubjectRace = J,
                                         x,
                                         stringsAsFactors=FALSE)
                           return(x)
                         }))
# table(dat$subj_race[dat$TA %in% c(0,2)])
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Black", "Black\n(n=59)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Hispanic", "Hispanic\n(n=59)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "Other", "Other\n(n=112)", fit_pc_llrace$SubjectRace)
fit_pc_llrace$SubjectRace <- ifelse(fit_pc_llrace$SubjectRace == "White", "White\n(n=249)", fit_pc_llrace$SubjectRace)

fit_pc_llrace$SubjectRace <- factor(fit_pc_llrace$SubjectRace, levels=c("White\n(n=249)", "Black\n(n=59)", "Hispanic\n(n=59)", "Other\n(n=112)"))

ggplot(fit_pc_llrace, aes(group=SubjectRace, colour=SubjectRace)) + 
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.96*Std..Error, ymax=Estimate+1.96*Std..Error), lwd=1, position=position_dodge(width=1/2)) +
  geom_linerange(aes(x=Outcome, ymin=Estimate-1.64*Std..Error, ymax=Estimate+1.64*Std..Error), lwd=2, position=position_dodge(width=1/2)) +
  geom_point(aes(x=Outcome, y=Estimate), size=3.5, position=position_dodge(width=1/2)) + 
  scale_y_continuous(limits = c(-.6, .6), breaks=seq(-.8, .8, .1)) +
  xlab("\nNet Discrimination Outcome") +
  ylab("\nEstimated Effect of Sending\nPunitive Message (vs. Control)\n") +
  geom_hline(yintercept=0, lty=2) + facet_wrap( ~ Pair) +
  scale_colour_brewer(palette = "Dark2", guide = guide_legend(title = "Perceived Race of Landlord/Broker:")) +
  theme_bw(base_size=15) + theme(legend.position="bottom", legend.key = element_blank())
ggsave("out_figurea7_appx_hetfxSUBJRACE_pc.pdf", width=11, height=7)

```

## Appendix E.15 (p. A-50): Analyses Addressing Spillover Concerns

For transparency, we show the code we use to conduct the analyses reported in Appendix E.15. However, we have omitted the raw data files that are used to conduct these analyses (i.e., every scraped ad from Craigslist) because they contain personal identifying information about human subjects.

```{r appendixe15, eval=F, echo=T}
setwd("") # set to directory containing all scraped Craigslist ads (before sampling)
# dirs1 - subdirectory listing, one per day

phone <- NULL # one row for each ad
phone_nums <- NULL 
for(i in 1:length(dirs1)) {
  
  thisdir <- dirs1[i]
  subdirs <- list.files(thisdir)
  subdirs <- subdirs[which(!grepl(".csv", subdirs))]
  
  if(length(subdirs) > 0) {
    
    subphone <- array(NA, length(subdirs))
    entry <- array(NA, length(subdirs))
    for(j in 1:length(subdirs)) { # one subdir per listing each day
      
      listing <- list.files(paste0(thisdir, "/", subdirs[j]))
      listing <- listing[which(grepl(".html", listing))]
      if(length(listing) > 1) { listing <- listing[1] }
      list_source <- readLines(paste0(thisdir, "/", subdirs[j], "/", listing))
      
      entry[j] <- paste(thisdir, subdirs[j], sep = "_")
      subphone[j] <- ifelse(length(which(str_extract_all(list_source, 
                                                         "\\(?\\d{3}\\)?[.-]? *\\d{3}[[:space:]]*[.-]+ *[.-[:space:]]?\\d{4}",
                                                         simplify = TRUE) != "")) > 0, 1, 0)
      if(subphone[j] == 1) {
        phone_vec <- str_extract_all(list_source, 
                                     "\\(?\\d{3}\\)?[.-]? *\\d{3}[[:space:]]*[.-]+ *[.-[:space:]]?\\d{4}",
                                     simplify = TRUE)
        phone_nums <- c(phone_nums, unique(phone_vec[which(phone_vec != "")])) # keep all phone numbers here (not available)
      }
      
      
    }
    phone <- rbind(phone, cbind(entry, subphone))
    
  }
  
}

phone_df <- as.data.frame(phone)
names(phone_df)[2] <- "hasphonenum"
phone_df$hasphonenum <- as.numeric(as.character(phone_df$hasphonenum))

phone_df
# # A tibble: 85,981 x 2
#             entry hasphonenum
#             <chr>       <dbl>
# 1   2012-04-13_1           1
# 2  2012-04-13_10           1
# 3 2012-04-13_100           0
# 4 2012-04-13_101           1
# 5 2012-04-13_102           1
# 6 2012-04-13_103           1
# 7 2012-04-13_104           0
# 8 2012-04-13_105           1
# 9 2012-04-13_106           0
# 10 2012-04-13_107          1
# # ... with 85,971 more rows


# > length(unique(phone_df$entry))
# [1] 85981
# > sum(phone_df$hasphonenum)
# [1] 47978
# > mean(phone_df$hasphonenum) ### estimated proportion of rental listings requiring contact by phone 
# [1] 0.558007

### probability a subject enters the audit sample
2711 / 85981
# [1] 0.03153022
### probability a subject enters the experiment sample
653 / 85981
# [1] 0.007594701

### estimated percentage of duplicate-subject listings by phone number
# > length(which(duplicated(phone_nums)))/length(phone_nums) # 78.96%

0.7896 * 85981
# [1] 67890.6
85981-67890
# [1] 18091
2711 / 18091
# [1] 0.1498535
653 / 18091
# [1] 0.0360953


### Capture-recapture analysis ###

phone_df <- tbl_df(as.data.frame(phone[which(phone[,2] == "1"),])) # keep only listings with phone numbers
phone_df <- phone_df[which(!duplicated(phone_nums)),] # de-dupe using list of phone numbers from above (not available)

# sample 1 - which are marked

set.seed(9382)

phone_s1 <- sample(phone_df$entry, 1000)
#sum(as.numeric(as.character(phone_s1))) # 538

# sample 2

phone_s2 <- sample(phone_df$entry, 1000)

length(which(phone_s2 %in% phone_s1)) # 95

# estimate pop

1000 / (95/1000) # [1] 10526.32

# using 55.8% est. with phone #, this means:

10526.32 / .558 # 18864.37
```

## Appendix E.16 (p. A-51): Joint Distribution of the Number of Testers in Matched Trios Who Receive a Callback and an Offer

```{r appendixe16, eval=T, echo=T, warning=F}
dat$num_cb <- apply(dat[,c("cb_A", "cb_B", "cb_C")], 1, sum)
dat$num_off <- apply(dat[,c("off_A", "off_B", "off_C")], 1, sum)

appxe16 <- with(dat, table(num_cb, num_off))

appxe16a <- cbind(appxe16, rowSums(appxe16))
appxe16b <- round(cbind(appxe16, rowSums(appxe16)) / rowSums(appxe16) * 100, 2)

appxe16_table <- list()
for(i in 1:nrow(appxe16a)) {
  appxe16_table[[i]] <- rbind(as.character(round(appxe16a[i,], 0)),
                               paste0("(", sprintf("%.2f", appxe16b[i,]) ,")"))
}
appxe16_table <- do.call(rbind, appxe16_table)
appxe16_table <- cbind(c("Number of Testers Receiving a Callback: 0", "",
                         "Number of Testers Receiving a Callback: 1", "",
                         "Number of Testers Receiving a Callback: 2", "",
                         "Number of Testers Receiving a Callback: 3", ""),
                       appxe16_table)

colnames(appxe16_table) <- c("", paste(rep("Number of Testers Receiving an Offer:", 4), 0:3), "Row Totals")
  
kable(appxe16_table, caption="**Appendix E.16: Number of Testers in a Matched Trio Receiving an Offer by the Number of Testers in a Matched Trio Receiving a Callback.** Cells report counts with percentages in parentheses.")
```



